diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index cb68794de..90fa96039 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -2,7 +2,13 @@ #include +#include "storm-pomdp/analysis/FormulaInformation.h" + #include "storm/utility/ConstantsComparator.h" +#include "storm/utility/NumberTraits.h" +#include "storm/utility/graph.h" +#include "storm/logic/Formulas.h" + #include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" @@ -15,23 +21,65 @@ #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" +#include "storm/utility/macros.h" +#include "storm/exceptions/NotSupportedException.h" + namespace storm { namespace pomdp { namespace modelchecker { template - ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - precision = 0.000000001; - cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); + ApproximatePOMDPModelchecker::Options::Options() { + initialGridResolution = 10; + explorationThreshold = storm::utility::zero(); + doRefinement = true; + refinementPrecision = storm::utility::convertNumber(1e-4); + numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); + } + + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); useMdp = true; maxIterations = 1000; cacheSubsimplices = false; } + template + std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); + if (formulaInfo.isNonNestedReachabilityProbability()) { + // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } + if (options.doRefinement) { + return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } else { + return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } + } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); + if (options.doRefinement) { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Rewards with refinement not implemented yet"); + //return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } else { + // FIXME: pick the non-unique reward model here + STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); + return computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + } + } else { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); + } + } + + template std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, uint64_t gridResolution, - double explorationThreshold) { + ApproximatePOMDPModelchecker::refineReachabilityProbability(std::set const &targetObservations, bool min) { std::srand(time(NULL)); // Compute easy upper and lower bounds storm::utility::Stopwatch underlyingWatch(true); @@ -88,14 +136,13 @@ namespace storm { // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. // This can probably be improved (i.e. resolutions for single belief states) - STORM_PRINT("Initial Resolution: " << gridResolution << std::endl) - std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); + STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl) + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); std::set changedObservations; uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, - explorationThreshold, initialOverApproxMap, underApproxMap, underApproxModelSize); + std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, underApproxMap, underApproxModelSize); ValueType lastMinScore = storm::utility::infinity(); while (refinementCounter < 1000) { // TODO the actual refinement @@ -169,7 +216,7 @@ namespace storm { } STORM_PRINT( "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, + res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, res, changedObservations, initialOverApproxMap, underApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); if (cc.isEqual(res->overApproxValue, res->underApproxValue)) { @@ -183,15 +230,14 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, + bool computeRewards, boost::optional> overApproximationMap, boost::optional> underApproximationMap, uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, + auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, underApproximationMap, maxUaModelSize); return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); } @@ -199,10 +245,9 @@ namespace storm { template std::shared_ptr> - ApproximatePOMDPModelchecker::computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, + bool computeRewards, boost::optional> overApproximationMap, boost::optional> underApproximationMap, uint64_t maxUaModelSize) { @@ -229,7 +274,7 @@ namespace storm { uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); // Initial belief always has belief ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); @@ -323,15 +368,16 @@ namespace storm { //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting // Expand the beliefs to generate the grid on-the-fly - if (explorationThreshold > 0) { - STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))) { + + if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { + // TODO: with rewards whe would have to assign the corresponding reward to this transition mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; } @@ -348,13 +394,13 @@ namespace storm { std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, + uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, observation, nextId); nextId = beliefList.size(); //Triangulate here and put the possibly resulting belief in the grid @@ -415,7 +461,7 @@ namespace storm { } } if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); } if (!transitionInActionBelief.empty()) { @@ -456,7 +502,7 @@ namespace storm { for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { // Add the reward mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief)); } } @@ -487,7 +533,7 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, + auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -500,10 +546,9 @@ namespace storm { template std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, + bool computeRewards, std::shared_ptr> refinementComponents, std::set changedObservations, boost::optional> overApproximationMap, @@ -543,12 +588,12 @@ namespace storm { for (auto const &stateActionPair : statesAndActionsToCheck) { auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); auto action = stateActionPair.second; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, refinementComponents->beliefList[currId], + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, + uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); nextBeliefId = refinementComponents->beliefList.size(); //Triangulate here and put the possibly resulting belief in the grid @@ -604,7 +649,7 @@ namespace storm { } /* TODO if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), refinementComponents->beliefList[currId]); }*/ if (!transitionInActionBelief.empty()) { @@ -618,7 +663,7 @@ namespace storm { bool isTarget = refinementComponents->beliefIsTarget[currId]; /* TODO - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))) { + if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; }*/ @@ -634,15 +679,13 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, - refinementComponents->beliefList[currId], - action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, + uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); nextBeliefId = refinementComponents->beliefList.size(); @@ -699,7 +742,7 @@ namespace storm { } /* if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); }*/ if (!transitionInActionBelief.empty()) { @@ -785,7 +828,7 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(pomdp, refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, + auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -799,8 +842,7 @@ namespace storm { template ValueType - ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + ApproximatePOMDPModelchecker::overApproximationValueIteration(std::vector> &beliefList, std::vector> &beliefGrid, std::vector &beliefIsTarget, std::map>> &observationProbabilities, @@ -915,20 +957,16 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution) { - std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); - return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, true, 0); + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); } template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution, double explorationThreshold) { - std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); - return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold); + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } template @@ -942,13 +980,13 @@ namespace storm { std::vector beliefIsTarget; uint64_t nextId = 0; // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); + constructBeliefGrid(targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); beliefGridTimer.stop(); @@ -993,20 +1031,20 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(currentBelief, action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, currentBelief, action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief); } } @@ -1022,14 +1060,14 @@ namespace storm { STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + auto overApprox = overApproximationValueIteration(beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache, result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - /*ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + /*ValueType underApprox = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/ underApproxTimer.stop(); auto underApprox = storm::utility::zero(); @@ -1062,8 +1100,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, uint64_t initialBeliefId, bool min, @@ -1101,10 +1138,10 @@ namespace storm { //TODO add a way to extract the actions from the over-approx and use them here? for (uint64_t action = 0; action < numChoices; ++action) { std::map transitionsInStateWithAction; - std::map observationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currentBeliefId], action); + std::map observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action); for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], + uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], action, observation, nextId); nextId = beliefList.size(); @@ -1146,7 +1183,7 @@ namespace storm { for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { // Add the reward rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief)); } } @@ -1231,8 +1268,7 @@ namespace storm { } template - storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief( - storm::models::sparse::Pomdp const &pomdp, uint64_t id) { + storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief(uint64_t id) { STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, @@ -1251,7 +1287,6 @@ namespace storm { template void ApproximatePOMDPModelchecker::constructBeliefGrid( - storm::models::sparse::Pomdp const &pomdp, std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsTarget, @@ -1399,7 +1434,6 @@ namespace storm { template std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex) { std::map res; @@ -1428,8 +1462,7 @@ namespace storm { template storm::pomdp::Belief - ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { + ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { std::map distributionAfter; uint32_t observation = 0; for (auto const &probEntry : belief.probabilities) { @@ -1446,8 +1479,7 @@ namespace storm { } template - uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { std::map distributionAfter; @@ -1480,8 +1512,7 @@ namespace storm { } template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, - uint64_t action, storm::pomdp::Belief &belief) { + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { for (auto const &probEntry : belief.probabilities) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index da6451db5..3cd5f06ba 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -8,6 +8,10 @@ #include "storm/storage/jani/Property.h" namespace storm { + namespace logic { + class Formula; + } + namespace pomdp { namespace modelchecker { typedef boost::bimap bsmap_type; @@ -47,53 +51,54 @@ namespace storm { template> class ApproximatePOMDPModelchecker { public: - explicit ApproximatePOMDPModelchecker(); + + struct Options { + Options(); + uint64_t initialGridResolution; /// Decides how precise the bounds are + ValueType explorationThreshold; /// the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state + bool doRefinement; /// Sets whether the bounds should be refined automatically until the refinement precision is reached + ValueType refinementPrecision; /// Used to decide when the refinement should terminate + ValueType numericPrecision; /// Used to decide whether two values are equal + }; + + ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); + + std::unique_ptr> check(storm::logic::Formula const& formula); + private: /** * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @param gridResolution the initial grid resolution - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - refineReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution, double explorationThreshold); + refineReachabilityProbability(std::set const &targetObservations, bool min); /** * Compute the reachability probability of given target observations on a POMDP for the given resolution only. * On-the-fly state space generation is used for the overapproximation * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @param gridResolution the grid resolution - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution, double explorationThreshold); + computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min); /** * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. * On-the-fly state space generation is used for the overapproximation * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if minimum rewards are to be computed - * @param gridResolution the initial grid resolution - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution); + computeReachabilityRewardOTF(std::set const &targetObservations, bool min); + // TODO: Check if this is obsolete /** * Compute the reachability probability for given target observations on a POMDP for the given resolution only. * Static state space generation is used for the overapproximation, i.e. the whole grid is generated @@ -109,11 +114,11 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution); + // TODO: Check if this is obsolete /** * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. * Static state space generation is used for the overapproximation, i.e. the whole grid is generated * - * @param pomdp the POMDP to be checked * @param targetObservations the set of observations to be reached * @param min true if the minimum rewards are to be computed * @param gridResolution the initial grid resolution @@ -128,27 +133,23 @@ namespace storm { /** * Helper method to compute the inital step of the refinement loop * - * @param pomdp the pomdp to be checked * @param targetObservations set of target observations * @param min true if minimum value is to be computed * @param observationResolutionVector vector containing the resolution to be used for each observation * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return struct containing components generated during the computation to be used in later refinement iterations */ std::shared_ptr> - computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, + computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, + bool computeRewards, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); std::shared_ptr> - computeRefinementStep(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, double explorationThreshold, std::shared_ptr> refinementComponents, + computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, + bool computeRewards, std::shared_ptr> refinementComponents, std::set changedObservations, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); @@ -156,28 +157,25 @@ namespace storm { /** * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * - * @param pomdp the pomdp to be checked * @param targetObservations set of target observations * @param min true if minimum value is to be computed * @param observationResolutionVector vector containing the resolution to be used for each observation * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, + computeReachabilityOTF(std::set const &targetObservations, bool min, + std::vector &observationResolutionVector, bool computeRewards, boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + // TODO: Check if this is obsolete /** * Helper method to compute reachability properties using static state space generation * - * @param pomdp the POMDP to be checked * @param targetObservations set of target observations * @param min true if minimum value is to be computed * @param gridResolution the resolution of the grid to be used @@ -193,7 +191,6 @@ namespace storm { * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. * - * @param pomdp the POMDP to be checked * @param beliefList vector containing already generated beliefs * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target * @param targetObservations set of target observations @@ -203,8 +200,7 @@ namespace storm { * @param maxModelSize number of states up until which the belief support should be unrolled * @return struct containing the components generated during the under approximation */ - std::unique_ptr> computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + std::unique_ptr> computeUnderapproximation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, uint64_t initialBeliefId, bool min, bool computeReward, @@ -218,7 +214,7 @@ namespace storm { * @return a belief representing the initial belief */ storm::pomdp::Belief - getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); + getInitialBelief(uint64_t id); /** @@ -236,7 +232,6 @@ namespace storm { /** * Helper method to construct the static belief grid for the POMDP overapproximation * - * @param pomdp the POMDP to be approximated * @param target_observations set of target observations * @param gridResolution the resolution of the grid to be constructed * @param beliefList data structure to store all generated beliefs @@ -244,8 +239,7 @@ namespace storm { * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief * @param nextId the ID to be used for the next generated belief */ - void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, - std::set const &target_observations, uint64_t gridResolution, + void constructBeliefGrid(std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsTarget, uint64_t nextId); @@ -254,21 +248,17 @@ namespace storm { /** * Helper method to get the probabilities to be in a state with each observation after performing an action * - * @param pomdp the POMDP * @param belief the belief in which the action is performed * @param actionIndex the index of the action to be performed * @return mapping from each observation to the probability to be in a state with that observation after performing the action */ - std::map computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief &belief, + std::map computeObservationProbabilitiesAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex); /** * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. * If the belief does not exist yet, it is created and added to the list of all beliefs * - * @param pomdp the POMDP on which the evaluation should be performed * @param beliefList data structure to store all generated beliefs * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief * @param targetObservations set of target observations @@ -277,9 +267,7 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - uint64_t getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, + uint64_t getBeliefAfterActionAndObservation(std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, @@ -288,15 +276,13 @@ namespace storm { /** * Helper method to generate the next belief that results from a belief by performing an action * - * @param pomdp the POMDP * @param belief the starting belief * @param actionIndex the index of the action to be performed * @param id the ID for the generated belief * @return a belief object representing the belief after performing the action in the starting belief */ storm::pomdp::Belief - getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, - uint64_t id); + getBeliefAfterAction(storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id); /** * Helper to get the id of a Belief stored in a given vector structure @@ -320,12 +306,11 @@ namespace storm { /** * Get the reward for performing an action in a given belief * - * @param pomdp the POMDP * @param action the index of the action to be performed * @param belief the belief in which the action is performed * @return the reward earned by performing the action in the belief */ - ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief &belief); + ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); /** @@ -349,7 +334,7 @@ namespace storm { * @return the resulting probability/reward in the initial state */ ValueType - overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + overApproximationValueIteration(std::vector> &beliefList, std::vector> &beliefGrid, std::vector &beliefIsTarget, std::map>> &observationProbabilities, std::map>> &nextBelieves, @@ -359,8 +344,10 @@ namespace storm { std::map> &chosenActions, uint64_t gridResolution, bool min, bool computeRewards); + storm::models::sparse::Pomdp const& pomdp; + Options options; storm::utility::ConstantsComparator cc; - double precision; + // TODO: these should be obsolete, right? bool useMdp; bool cacheSubsimplices; uint64_t maxIterations;