diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 107f699ae..33e1d1f51 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -6,6 +6,7 @@ #include #include +#include "storm-parsers/api/properties.h" #include "storm/api/properties.h" #include "storm/api/verification.h" @@ -13,20 +14,25 @@ #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" #include "storm/utility/SignalHandler.h" +#include "storm/modelchecker/results/CheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" namespace storm { namespace builder { - template + template class BeliefMdpExplorer { public: typedef typename PomdpType::ValueType ValueType; - typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::storage::BeliefManager BeliefManagerType; typedef typename BeliefManagerType::BeliefId BeliefId; typedef uint64_t MdpStateType; BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { // Intentionally left empty } + BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { // Reset data from potential previous explorations diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 9c06c4c06..436fc3e09 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -15,13 +15,8 @@ #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" -#include "storm/modelchecker/results/CheckResult.h" -#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" -#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" -#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" -#include "storm-parsers/api/storm-parsers.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" @@ -32,8 +27,8 @@ namespace storm { namespace pomdp { namespace modelchecker { - template - ApproximatePOMDPModelchecker::Options::Options() { + template + ApproximatePOMDPModelchecker::Options::Options() { initialGridResolution = 10; explorationThreshold = storm::utility::zero(); doRefinement = true; @@ -41,61 +36,78 @@ namespace storm { numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; } - template - ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { + + template + ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { + // Intentionally left empty + } + + template + typename ApproximatePOMDPModelchecker::ValueType + ApproximatePOMDPModelchecker::Result::diff(bool relative) const { + ValueType diff = upperBound - lowerBound; + if (diff < storm::utility::zero()) { + STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); + diff = storm::utility::zero(); + } + if (relative && !storm::utility::isZero(upperBound)) { + diff /= upperBound; + } + return diff; + } + + template + ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } - template - ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } - template - std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + template + typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { // Reset all collected statistics statistics = Statistics(); - std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); - if (formulaInfo.isNonNestedReachabilityProbability()) { - // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. - STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (!formulaInfo.getSinkStates().empty()) { - auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); - reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; - STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); - } - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); - } else { - result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); - } - } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + boost::optional rewardModelName; + if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } } else { - // FIXME: pick the non-unique reward model here - STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); + // Expected reward formula! + rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } + + if (options.doRefinement) { + refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } else { + computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } return result; } - template - void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { + template + void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); @@ -143,114 +155,82 @@ namespace storm { stream << "##########################################" << std::endl; } - std::shared_ptr createStandardProperty(bool min, bool computeRewards) { - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - return storm::api::extractFormulasFromProperties(propertyVector).front(); - } - - template - storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property, std::vector&& hintVector) { - //Note: The property should not run out of scope after calling this because the task only stores the property by reference. - // Therefore, this method needs the property by reference (and not const reference) - auto task = storm::api::createTask(property, false); - if (!hintVector.empty()) { - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(std::move(hintVector)); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); - } - return task; - } + - template - std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, bool computeRewards) { - std::srand(time(NULL)); - // Compute easy upper and lower bounds - storm::utility::Stopwatch underlyingWatch(true); - // Compute the results on the underlying MDP as a basic overapproximation - storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); - // TODO: Is the following really necessary - underlyingMdpLabeling.addLabel("__goal__"); - std::vector goalStates; - for (auto const &targetObs : targetObservations) { - for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { - underlyingMdpLabeling.addLabelToState("__goal__", goalState); - } - } - storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); - auto underlyingModel = std::static_pointer_cast>( - std::make_shared>(underlyingMdp)); - std::string initPropString = computeRewards ? "R" : "P"; - initPropString += min ? "min" : "max"; - initPropString += "=? [F \"__goal__\"]"; - std::vector propVector = storm::api::parseProperties(initPropString); - std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); - STORM_PRINT("Underlying MDP" << std::endl) - if (computeRewards) { - underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel()); + template + void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - underlyingMdp.printModelInformationToStream(std::cout); - std::unique_ptr underlyingRes( - storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underlyingRes, "Result not exist."); - underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); - underlyingWatch.stop(); - - storm::utility::Stopwatch positionalWatch(true); - // we define some positional scheduler for the POMDP as a basic lower bound - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - // select a random action for all states with the same observation - uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); - for (auto const &state : obsStates) { - pomdpScheduler.setChoice(chosenAction, state); + + uint64_t underApproxSizeThreshold = 0; + { // Overapproximation + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); + if (approx) { + STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.lowerBound : result.upperBound; + resultValue = approx->getComputedValueAtInitialState(); + underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); + } + } + { // Underapproximation (Uses a fresh Belief manager) + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); + if (approx) { + STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.upperBound : result.lowerBound; + resultValue = approx->getComputedValueAtInitialState(); } } - auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); - if (computeRewards) { - underApproxModel->restrictRewardModels({"std"}); - } - STORM_PRINT("Random Positional Scheduler" << std::endl) - underApproxModel->printModelInformationToStream(std::cout); - std::unique_ptr underapproxRes( - storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underapproxRes, "Result not exist."); - underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); - auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); - positionalWatch.stop(); - - STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " - << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) - STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) - - // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. - // This can probably be improved (i.e. resolutions for single belief states) - STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl) + } + + template + void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - std::set changedObservations; - uint64_t underApproxModelSize = 200; - uint64_t refinementCounter = 1; - STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - {}, - {}, underApproxModelSize); - if (res == nullptr) { - statistics.refinementSteps = 0; - return nullptr; + auto beliefManager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + beliefManager->setRewardModel(rewardModelName); + } + + // OverApproximaion + auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); + if (!overApproximation) { + return; } - ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || - (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { + ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; + overApproxValue = overApproximation->getComputedValueAtInitialState(); + + // UnderApproximation TODO: use same belief manager?) + uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); + auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); + if (!underApproximation) { + return; + } + ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; + underApproxValue = underApproximation->getComputedValueAtInitialState(); + + // ValueType lastMinScore = storm::utility::infinity(); + // Start refinement + statistics.refinementSteps = 0; + while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } // TODO the actual refinement + /* // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); std::vector beliefCount(pomdp.getNrObservations(), 0); @@ -286,9 +266,9 @@ namespace storm { } - /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - }*/ + //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { + // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); + //} changedObservations.clear(); //TODO think about some other scoring methods @@ -301,21 +281,21 @@ namespace storm { observationResolutionVector[i] = maxRes + 1; changedObservations.insert(i); } - /*} else { - lastMinScore = std::min(maxAvgDifference, lastMinScore); - STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { + //} else { + // lastMinScore = std::min(maxAvgDifference, lastMinScore); + // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) + // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) + // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) + // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { + // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { //STORM_PRINT(" *** ") - observationResolutionVector[i] += 1; - changedObservations.insert(i); - } + // observationResolutionVector[i] += 1; + // changedObservations.insert(i); + // } //STORM_PRINT(std::endl) - } - }*/ + // } + //} if (underApproxModelSize < std::numeric_limits::max() - 101) { underApproxModelSize += 100; } @@ -327,60 +307,13 @@ namespace storm { STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); - ++refinementCounter; - } - statistics.refinementSteps = refinementCounter; - if (min) { - return std::make_unique>(POMDPCheckResult{res->underApproxValue, res->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); - } - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - bool computeRewards, - std::vector const& lowerPomdpValueBounds, - std::vector const& upperPomdpValueBounds, - uint64_t maxUaModelSize) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds, - upperPomdpValueBounds, maxUaModelSize); - if (result == nullptr) { - return nullptr; - } - if (min) { - return std::make_unique>(POMDPCheckResult{result->underApproxValue, result->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); + */ + ++statistics.refinementSteps.get(); } } - template - ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { - ValueType result = storm::utility::zero(); - for (auto const& entry : belief) { - result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); - } - return result; - } - - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::vector const& lowerPomdpValueBounds, - std::vector const& upperPomdpValueBounds, - uint64_t maxUaModelSize) { - - auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); - if (computeRewards) { - beliefManager->setRewardModel(); // TODO: get actual name - } - + template + std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { statistics.overApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { @@ -390,9 +323,6 @@ namespace storm { } // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } while (explorer.hasUnexploredState()) { uint64_t currId = explorer.exploreNextState(); @@ -445,39 +375,20 @@ namespace storm { explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.overApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - // TODO: return other components needed for refinement. - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - /* TODO: return other components needed for refinement. - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, - underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); - */ - return std::make_unique>(RefinementComponents{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {}, - underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); - + return std::make_shared(std::move(explorer)); } - template + template + void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + /*TODO: + template std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, std::shared_ptr> refinementComponents, @@ -504,7 +415,7 @@ namespace storm { uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; + std::set relevantStates; // The MDP states where the observation has changed for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { auto currentBelief = refinementComponents->beliefList[iter.first]; if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { @@ -512,7 +423,7 @@ namespace storm { } } - std::set> statesAndActionsToCheck; + std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( @@ -536,6 +447,7 @@ namespace storm { action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + // Expand and triangulate the successor uint32_t observation = iter->first; uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); @@ -803,160 +715,12 @@ namespace storm { refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); + */ } - - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, - bool computeRewards, uint64_t maxModelSize) { - std::set visitedBelieves; - std::deque beliefsToBeExpanded; - bsmap_type beliefStateMap; - std::vector>> transitions = {{{{0, storm::utility::one()}}}, - {{{1, storm::utility::one()}}}}; - std::vector targetStates = {1}; - - uint64_t stateId = 2; - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId)); - ++stateId; - uint64_t nextId = beliefList.size(); - uint64_t counter = 0; - statistics.underApproximationBuildTime.start(); - // Expand the believes - visitedBelieves.insert(initialBeliefId); - beliefsToBeExpanded.push_back(initialBeliefId); - while (!beliefsToBeExpanded.empty()) { - //TODO think of other ways to stop exploration besides model size - auto currentBeliefId = beliefsToBeExpanded.front(); - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); - // for targets, we only consider one action with one transition - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states - targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); - transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); - } else if (counter > maxModelSize) { - transitions.push_back({{{0, storm::utility::one()}}}); - } else { - // Iterate over all actions and add the corresponding transitions - std::vector> actionTransitionStorage; - //TODO add a way to extract the actions from the over-approx and use them here? - for (uint64_t action = 0; action < numChoices; ++action) { - std::map transitionsInStateWithAction; - std::map observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action); - for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], - action, - observation, nextId); - nextId = beliefList.size(); - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); - ++stateId; - beliefsToBeExpanded.push_back(nextBeliefId); - ++counter; - } - transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; - } - actionTransitionStorage.push_back(transitionsInStateWithAction); - } - transitions.push_back(actionTransitionStorage); - } - beliefsToBeExpanded.pop_front(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildAborted = true; - break; - } - } - statistics.underApproximationStates = transitions.size(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildTime.stop(); - return nullptr; - } + template + std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - std::shared_ptr> model; - auto transitionMatrix = buildTransitionMatrix(transitions); - if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { - transitionMatrix.makeRowGroupingTrivial(); - } - storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); - storm::models::sparse::Mdp underApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); - } - model = std::make_shared>(underApproxMdp); - - model->printModelInformationToStream(std::cout); - statistics.underApproximationBuildTime.stop(); - - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); - statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, - std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds) { - // Build the belief MDP until enough states are explored. - //TODO think of other ways to stop exploration besides model size - statistics.underApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { @@ -981,7 +745,7 @@ namespace storm { if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; explorer.setCurrentStateIsTruncated(); - } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) { + } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { stopExploration = true; explorer.setCurrentStateIsTruncated(); } @@ -1024,249 +788,22 @@ namespace storm { explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); - STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl); - explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.underApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); - - return std::make_unique>(UnderApproxComponents{explorer.getComputedValueAtInitialState(), {}, {}}); - } - - - template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - uint64_t nrColumns = transitions.size(); - uint64_t nrRows = 0; - uint64_t nrEntries = 0; - for (auto const &actionTransitions : transitions) { - for (auto const &map : actionTransitions) { - nrEntries += map.size(); - ++nrRows; - } - } - storm::storage::SparseMatrixBuilder smb(nrRows, nrColumns, nrEntries, true, true); - for (auto const &actionTransitions : transitions) { - smb.newRowGroup(currentRow); - for (auto const &map : actionTransitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - ++currentRowGroup; - } - return smb.build(); - } - - template - uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( - std::vector> const &grid, uint32_t observation, - std::map &probabilities) { - // TODO This one is quite slow - for (auto const &belief : grid) { - if (belief.observation == observation) { - bool same = true; - for (auto const &probEntry : belief.probabilities) { - if (probabilities.find(probEntry.first) == probabilities.end()) { - same = false; - break; - } - if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) { - same = false; - break; - } - } - if (same) { - return belief.id; - } - } - } - return -1; - } - - template - storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief(uint64_t id) { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - std::map distribution; - uint32_t observation = 0; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (pomdp.getInitialStates()[state] == 1) { - distribution[state] = storm::utility::one(); - observation = pomdp.getObservation(state); - break; - } - } - return storm::pomdp::Belief{id, observation, distribution}; - } - - template - std::pair>, std::vector> - ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::map &probabilities, uint64_t resolution, uint64_t nrStates) { - - //TODO this can also be simplified using the sparse vector interpretation - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : probabilities) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; - } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } - } - } - } - std::vector> subSimplex(nrStates); - for (size_t j = 0; j < nrStates; ++j) { - for (size_t i = 0; i < nrStates - 1; ++i) { - if (cc.isLess(storm::utility::zero(), qs[j][i] - qs[j][i + 1])) { - subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; - } - } - - if (cc.isLess(storm::utility::zero(), qs[j][nrStates - 1])) { - subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution; - } - } - - std::vector lambdas(nrStates, storm::utility::zero()); - auto sum = storm::utility::zero(); - for (size_t i = 1; i < nrStates; ++i) { - lambdas[i] = d[p[i - 1]] - d[p[i]]; - sum += d[p[i - 1]] - d[p[i]]; - } - lambdas[0] = storm::utility::one() - sum; - - return std::make_pair(subSimplex, lambdas); - } - - - template - std::map - ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::pomdp::Belief &belief, - uint64_t actionIndex) { - std::map res; - // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::map postProbabilities; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (entry.getValue() > 0) { - postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - for (auto const &probEntry : postProbabilities) { - uint32_t observation = pomdp.getObservation(probEntry.first); - if (res.count(observation) == 0) { - res[observation] = probEntry.second; - } else { - res[observation] += probEntry.second; - } - } - - return res; + return std::make_shared(std::move(explorer)); } - template - uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, - uint32_t observation, uint64_t id) { - std::map distributionAfter; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (pomdp.getObservation(entry.getColumn()) == observation) { - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - // We have to normalize the distribution - auto sum = storm::utility::zero(); - for (auto const &entry : distributionAfter) { - sum += entry.second; - } - - for (auto const &entry : distributionAfter) { - distributionAfter[entry.first] /= sum; - } - if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); - return res; - } else { - beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - return id; - } + template + void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { + // TODO } - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, std::map const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief.probabilities) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - - template - class ApproximatePOMDPModelchecker; - -#ifdef STORM_HAVE_CARL - - template - class ApproximatePOMDPModelchecker; + template class ApproximatePOMDPModelchecker>; + template class ApproximatePOMDPModelchecker>; -#endif } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 925bff5b5..0d59ac31a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -4,6 +4,7 @@ #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/builder/BeliefMdpExplorer.h" #include #include "storm/storage/jani/Property.h" @@ -17,12 +18,6 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - template - struct POMDPCheckResult { - ValueType overApproxValue; - ValueType underApproxValue; - }; - /** * Struct containing information which is supposed to be persistent over multiple refinement steps * @@ -49,9 +44,13 @@ namespace storm { bsmap_type underApproxBeliefStateMap; }; - template> + template class ApproximatePOMDPModelchecker { public: + typedef typename PomdpModelType::ValueType ValueType; + typedef typename PomdpModelType::RewardModelType RewardModelType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::builder::BeliefMdpExplorer ExplorerType; struct Options { Options(); @@ -63,85 +62,60 @@ namespace storm { bool cacheSubsimplices; /// Enables caching of subsimplices }; - ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); + struct Result { + Result(ValueType lower, ValueType upper); + ValueType lowerBound; + ValueType upperBound; + ValueType diff (bool relative = false) const; + }; + + ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); - std::unique_ptr> check(storm::logic::Formula const& formula); + Result check(storm::logic::Formula const& formula); void printStatisticsToStream(std::ostream& stream) const; private: /** - * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop + * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * - * @param targetObservations the set of observations to be reached - * @param min true if minimum probability is to be computed - * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param observationResolutionVector vector containing the resolution to be used for each observation + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value + * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value + * @param maxUaModelSize the maximum size of the underapproximation model to be generated + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - refineReachability(std::set const &targetObservations, bool min, bool computeRewards); - + void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); + + /** - * Compute the reachability probability of given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation + * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop * * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min); + void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); /** - * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation - * - * @param targetObservations the set of observations to be reached - * @param min true if minimum rewards are to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - std::unique_ptr> - computeReachabilityRewardOTF(std::set const &targetObservations, bool min); + std::shared_ptr computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager); + + void refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); - private: /** - * Helper method to compute the inital step of the refinement loop - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return struct containing components generated during the computation to be used in later refinement iterations + * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - std::shared_ptr> - computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); + std::shared_ptr computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager); - std::shared_ptr> - computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + void refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - /** - * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, bool computeRewards, - std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); +#ifdef REMOVE_THIS /** * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. @@ -243,7 +217,8 @@ namespace storm { */ ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); ValueType getRewardAfterAction(uint64_t action, std::map const& belief); - +#endif //REMOVE_THIS + struct Statistics { Statistics(); boost::optional refinementSteps; @@ -262,7 +237,7 @@ namespace storm { }; Statistics statistics; - storm::models::sparse::Pomdp const& pomdp; + PomdpModelType const& pomdp; Options options; storm::utility::ConstantsComparator cc; };