Browse Source

Implemented 'guessing' of initial pomdp schedulers for multiple guesses

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
2f2a007896
  1. 26
      src/storm-pomdp/builder/BeliefMdpExplorer.h
  2. 19
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  3. 7
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
  4. 202
      src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h

26
src/storm-pomdp/builder/BeliefMdpExplorer.h

@ -14,6 +14,7 @@
#include "storm/storage/SparseMatrix.h"
#include "storm/utility/macros.h"
#include "storm-pomdp/storage/BeliefManager.h"
#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
#include "storm/utility/SignalHandler.h"
#include "storm/modelchecker/results/CheckResult.h"
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
@ -38,7 +39,7 @@ namespace storm {
ModelChecked
};
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) {
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds) : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) {
// Intentionally left empty
}
BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
@ -544,16 +545,24 @@ namespace storm {
return upperValueBounds[getCurrentMdpState()];
}
/// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties)
/// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available.");
auto it = pomdpValueBounds.lower.begin();
ValueType result = beliefManager->getWeightedSum(beliefId, *it);
for (++it; it != pomdpValueBounds.lower.end(); ++it) {
result = std::max(result, beliefManager->getWeightedSum(beliefId, *it));
}
return result;
}
/// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties)
/// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available.");
auto it = pomdpValueBounds.upper.begin();
ValueType result = beliefManager->getWeightedSum(beliefId, *it);
for (++it; it != pomdpValueBounds.upper.end(); ++it) {
result = std::min(result, beliefManager->getWeightedSum(beliefId, *it));
}
return result;
}
void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
@ -817,8 +826,7 @@ namespace storm {
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
// Value and scheduler related information
std::vector<ValueType> const& pomdpLowerValueBounds;
std::vector<ValueType> const& pomdpUpperValueBounds;
storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> pomdpValueBounds;
std::vector<ValueType> lowerValueBounds;
std::vector<ValueType> upperValueBounds;
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed

19
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -87,7 +87,8 @@ namespace storm {
// Compute some initial bounds on the values for each state of the pomdp
auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]);
uint64_t initialPomdpState = pomdp.getInitialStates().getNextSetIndex(0);
Result result(initialPomdpValueBounds.getHighestLowerBound(initialPomdpState), initialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]" << std::endl);
boost::optional<std::string> rewardModelName;
@ -112,9 +113,9 @@ namespace storm {
}
if (options.refine) {
refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result);
} else {
computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result);
}
// "clear" results in case they were actually not requested (this will make the output a bit more clear)
if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
@ -187,7 +188,7 @@ namespace storm {
}
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) {
if (options.discretize) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit);
@ -195,7 +196,7 @@ namespace storm {
if (rewardModelName) {
manager->setRewardModel(rewardModelName);
}
auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
auto approx = std::make_shared<ExplorerType>(manager, pomdpValueBounds);
HeuristicParameters heuristicParameters;
heuristicParameters.gapThreshold = options.gapThresholdInit;
heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement
@ -215,7 +216,7 @@ namespace storm {
if (rewardModelName) {
manager->setRewardModel(rewardModelName);
}
auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
auto approx = std::make_shared<ExplorerType>(manager, pomdpValueBounds);
HeuristicParameters heuristicParameters;
heuristicParameters.gapThreshold = options.gapThresholdInit;
heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
@ -239,7 +240,7 @@ namespace storm {
}
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) {
statistics.refinementSteps = 0;
// Set up exploration data
@ -253,7 +254,7 @@ namespace storm {
if (rewardModelName) {
overApproxBeliefManager->setRewardModel(rewardModelName);
}
overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, pomdpValueBounds);
overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
@ -277,7 +278,7 @@ namespace storm {
if (rewardModelName) {
underApproxBeliefManager->setRewardModel(rewardModelName);
}
underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, pomdpValueBounds);
underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;

7
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -16,6 +16,9 @@ namespace storm {
namespace pomdp {
namespace modelchecker {
template<typename ValueType>
struct TrivialPomdpValueBounds;
template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
class ApproximatePOMDPModelchecker {
public:
@ -53,7 +56,7 @@ namespace storm {
* @param maxUaModelSize the maximum size of the underapproximation model to be generated
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/
void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result);
/**
@ -63,7 +66,7 @@ namespace storm {
* @param min true if minimum probability is to be computed
* @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/
void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result);
struct HeuristicParameters {
ValueType gapThreshold;

202
src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h

@ -15,26 +15,112 @@
namespace storm {
namespace pomdp {
namespace modelchecker {
template<typename ValueType>
struct TrivialPomdpValueBounds {
std::vector<std::vector<ValueType>> lower;
std::vector<std::vector<ValueType>> upper;
ValueType getHighestLowerBound(uint64_t const& state) {
STORM_LOG_ASSERT(!lower.empty(), "requested a lower bound but none were available");
auto it = lower.begin();
ValueType result = (*it)[state];
for (++it; it != lower.end(); ++it) {
result = std::max(result, (*it)[state]);
}
return result;
}
ValueType getSmallestUpperBound(uint64_t const& state) {
STORM_LOG_ASSERT(!upper.empty(), "requested an upper bound but none were available");
auto it = upper.begin();
ValueType result = (*it)[state];
for (++it; it != upper.end(); ++it) {
result = std::min(result, (*it)[state]);
}
return result;
}
};
template <typename PomdpType>
class TrivialPomdpValueBoundsModelChecker {
public:
typedef typename PomdpType::ValueType ValueType;
typedef TrivialPomdpValueBounds<ValueType> ValueBounds;
TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) {
// Intentionally left empty
}
struct ValueBounds {
std::vector<ValueType> lower;
std::vector<ValueType> upper;
};
ValueBounds getValueBounds(storm::logic::Formula const& formula) {
return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula));
}
std::vector<ValueType> getChoiceValues(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards) {
std::vector<ValueType> choiceValues((pomdp.getNumberOfChoices()));
pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards);
return choiceValues;
}
std::vector<ValueType> computeValuesForGuessedScheduler(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, std::shared_ptr<storm::models::sparse::Mdp<ValueType>> underlyingMdp, ValueType const& scoreThreshold, bool relativeScore) {
// Create some positional scheduler for the POMDP
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
// For each state, we heuristically find a good distribution over output actions.
auto choiceValues = getChoiceValues(stateValues, actionBasedRewards);
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
std::vector<storm::storage::Distribution<ValueType, uint_fast64_t>> choiceDistributions(pomdp.getNrObservations());
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)];
ValueType const& stateValue = stateValues[state];
assert(stateValue >= storm::utility::zero<ValueType>());
std::cout << state << ": " << stateValue << "\t t=" << scoreThreshold << " rel=" << relativeScore << std::endl;
for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
ValueType const& choiceValue = choiceValues[choice];
assert(choiceValue >= storm::utility::zero<ValueType>());
std::cout << "\t" << (choice - choiceIndices[state]) << ": " << choiceValue << std::endl;
// Rate this choice by considering the relative difference between the choice value and the (optimal) state value
// A high score shall mean that the choice is "good"
if (storm::utility::isInfinity(stateValue)) {
// For infinity states, we simply distribute uniformly.
// FIXME: This case could be handled a bit more sensible
choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold);
} else {
ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue);
if (relativeScore) {
ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber<ValueType, uint64_t>(2);
if (!storm::utility::isZero(avg)) {
choiceScore /= avg;
}
}
choiceScore = storm::utility::one<ValueType>() - choiceScore;
if (choiceScore >= scoreThreshold) {
choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore);
}
}
}
STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution.");
}
// Normalize all distributions
for (auto& choiceDistribution : choiceDistributions) {
choiceDistribution.normalize();
}
// Set the scheduler for all states
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state);
}
STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
return pomdpSchedulerResult;
}
ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) {
STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis.");
// Compute the values on the fully observable MDP
// We need an actual MDP here so that the apply scheduler method below will work.
// We need an actual MDP so that we can apply schedulers below.
// Also, the api call in the next line will require a copy anyway.
auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
@ -42,70 +128,76 @@ namespace storm {
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
// Create some positional scheduler for the POMDP
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
// For each state, we heuristically find a good distribution over output actions.
std::vector<ValueType> fullyObservableChoiceValues(pomdp.getNumberOfChoices());
std::vector<ValueType> actionBasedRewards;
std::vector<ValueType>* actionBasedRewardsPtr = nullptr;
if (info.isNonNestedExpectedRewardFormula()) {
std::vector<ValueType> actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards);
} else {
pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues);
actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
actionBasedRewardsPtr = &actionBasedRewards;
}
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
auto obsStates = pomdp.getStatesWithObservation(obs);
storm::storage::Distribution<ValueType, uint_fast64_t> choiceDistribution;
for (auto const &state : obsStates) {
ValueType const& stateValue = fullyObservableResult[state];
assert(stateValue >= storm::utility::zero<ValueType>());
for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
ValueType const& choiceValue = fullyObservableChoiceValues[choice];
assert(choiceValue >= storm::utility::zero<ValueType>());
// Rate this choice by considering the relative difference between the choice value and the (optimal) state value
ValueType choiceRating;
if (stateValue < choiceValue) {
choiceRating = choiceValue - stateValue;
if (!storm::utility::isZero(choiceValue)) {
choiceRating /= choiceValue;
}
std::vector<std::vector<ValueType>> guessedSchedulerValues;
std::vector<std::pair<double, bool>> guessParameters({{0.875,false},{0.875,true},{0.75,false},{0.75,true}});
for (auto const& pars : guessParameters) {
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(pars.first), pars.second));
}
// compute the 'best' guess and do a few iterations on it
uint64_t bestGuess = 0;
ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero<ValueType>());
for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) {
ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero<ValueType>());
if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) {
bestGuess = guess;
bestGuessSum = guessSum;
}
}
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
// Check if one of the guesses is worse than one of the others (and potentially delete it)
// Avoid deleting entries during the loop to ensure that indices remain valid
storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true);
for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) {
if (!keptGuesses.get(i)) {
continue;
}
for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) {
if (!keptGuesses.get(j)) {
continue;
}
if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal<ValueType>())) {
if (info.minimize()) {
// In this case we are guessing upper bounds (and smaller upper bounds are better)
keptGuesses.set(j, false);
} else {
choiceRating = stateValue - choiceValue;
if (!storm::utility::isZero(stateValue)) {
choiceRating /= stateValue;
}
// In this case we are guessing lower bounds (and larger lower bounds are better)
keptGuesses.set(i, false);
break;
}
assert(choiceRating <= storm::utility::one<ValueType>());
assert(choiceRating >= storm::utility::zero<ValueType>());
// choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice
if (choiceRating <= storm::utility::convertNumber<ValueType>(0.5)) {
choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one<ValueType>() - choiceRating);
} else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal<ValueType>())) {
if (info.minimize()) {
keptGuesses.set(i, false);
break;
} else {
keptGuesses.set(j, false);
}
}
}
choiceDistribution.normalize();
for (auto const& state : obsStates) {
pomdpScheduler.setChoice(choiceDistribution, state);
}
}
STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained.");
STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
std::cout << "Keeping scheduler guesses " << keptGuesses << std::endl;
storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses);
// Finally prepare the result
ValueBounds result;
if (info.minimize()) {
result.lower = std::move(fullyObservableResult);
result.upper = std::move(pomdpSchedulerResult);
result.lower.push_back(std::move(fullyObservableResult));
result.upper = std::move(guessedSchedulerValues);
} else {
result.lower = std::move(pomdpSchedulerResult);
result.upper = std::move(fullyObservableResult);
result.lower = std::move(guessedSchedulerValues);
result.upper.push_back(std::move(fullyObservableResult));
}
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal<ValueType>()), "Lower bound is larger than upper bound");
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal<ValueType>()), "Lower bound is larger than upper bound");
return result;
}
Loading…
Cancel
Save