#include "ApproximatePOMDPModelchecker.h" #include #include #include "storm-pomdp/analysis/FormulaInformation.h" #include "storm/utility/ConstantsComparator.h" #include "storm/utility/NumberTraits.h" #include "storm/utility/graph.h" #include "storm/logic/Formulas.h" #include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" #include "storm/exceptions/NotSupportedException.h" namespace storm { namespace pomdp { namespace modelchecker { template ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { // Intentionally left empty } template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::Result::diff(bool relative) const { ValueType diff = upperBound - lowerBound; if (diff < storm::utility::zero()) { STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); diff = storm::utility::zero(); } if (relative && !storm::utility::isZero(upperBound)) { diff /= upperBound; } return diff; } template bool ApproximatePOMDPModelchecker::Result::updateLowerBound(ValueType const& value) { if (value > lowerBound) { lowerBound = value; return true; } return false; } template bool ApproximatePOMDPModelchecker::Result::updateUpperBound(ValueType const& value) { if (value < upperBound) { upperBound = value; return true; } return false; } template ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } template typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given."); // Reset all collected statistics statistics = Statistics(); statistics.totalTime.start(); // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]" << std::endl); boost::optional rewardModelName; if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); if (formulaInfo.isNonNestedReachabilityProbability()) { if (!formulaInfo.getSinkStates().empty()) { auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); } } else { // Expected reward formula! rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } if (options.refine) { refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } else { computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } // "clear" results in case they were actually not requested (this will make the output a bit more clear) if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) { result.lowerBound = -storm::utility::infinity(); } if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) { result.upperBound = storm::utility::infinity(); } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } statistics.totalTime.stop(); return result; } template void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; if (statistics.aborted) { stream << "# Computation aborted early" << std::endl; } stream << "# Total check time: " << statistics.totalTime << std::endl; // Refinement information: if (statistics.refinementSteps) { stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; } // The overapproximation MDP: if (statistics.overApproximationStates) { stream << "# Number of states in the "; if (options.refine) { stream << "final "; } stream << "grid MDP for the over-approximation: "; if (statistics.overApproximationBuildAborted) { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; } // The underapproximation MDP: if (statistics.underApproximationStates) { stream << "# Number of states in the "; if (options.refine) { stream << "final "; } stream << "grid MDP for the under-approximation: "; if (statistics.underApproximationBuildAborted) { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } stream << "##########################################" << std::endl; } template void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { if (options.discretize) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit); auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); HeuristicParameters heuristicParameters; heuristicParameters.gapThreshold = options.gapThresholdInit; heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement heuristicParameters.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; resultValue = approx->getComputedValueAtInitialState(); } } if (options.unfold) { // Underapproximation (uses a fresh Belief manager) auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); HeuristicParameters heuristicParameters; heuristicParameters.gapThreshold = options.gapThresholdInit; heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; heuristicParameters.sizeThreshold = options.sizeThresholdInit; if (heuristicParameters.sizeThreshold == 0) { if (options.explorationTimeLimit) { heuristicParameters.sizeThreshold = std::numeric_limits::max(); } else { heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); STORM_PRINT_AND_LOG("Heuristically selected an under-approximation mdp size threshold of " << heuristicParameters.sizeThreshold << "." << std::endl); } } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.upperBound : result.lowerBound; resultValue = approx->getComputedValueAtInitialState(); } } } template void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { statistics.refinementSteps = 0; // Set up exploration data std::vector observationResolutionVector; std::shared_ptr overApproxBeliefManager; std::shared_ptr overApproximation; HeuristicParameters overApproxHeuristicPar; if (options.discretize) { // Setup and build first OverApproximation observationResolutionVector = std::vector(pomdp.getNrObservations(), options.resolutionInit); overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { overApproxBeliefManager->setRewardModel(rewardModelName); } overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); overApproxHeuristicPar.gapThreshold = options.gapThresholdInit; overApproxHeuristicPar.observationThreshold = options.obsThresholdInit; overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); if (!overApproximation->hasComputedValues()) { return; } ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); if (betterBound) { STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); } } std::shared_ptr underApproxBeliefManager; std::shared_ptr underApproximation; HeuristicParameters underApproxHeuristicPar; if (options.unfold) { // Setup and build first UnderApproximation underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { underApproxBeliefManager->setRewardModel(rewardModelName); } underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); underApproxHeuristicPar.gapThreshold = options.gapThresholdInit; underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; if (underApproxHeuristicPar.sizeThreshold == 0) { // Select a decent value automatically underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); if (!underApproximation->hasComputedValues()) { return; } ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); if (betterBound) { STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); } } // Do some output STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); bool computingLowerBound = false; bool computingUpperBound = false; if (options.discretize) { STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); (min ? computingLowerBound : computingUpperBound) = true; } if (options.unfold) { STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); (min ? computingUpperBound : computingLowerBound) = true; } if (computingLowerBound && computingUpperBound) { STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); } else if (computingLowerBound) { STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); } else if (computingUpperBound) { STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); } STORM_PRINT_AND_LOG(std::endl); // Start refinement STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { bool overApproxFixPoint = true; bool underApproxFixPoint = true; if (options.discretize) { // Refine over-approximation if (min) { overApproximation->takeCurrentValuesAsLowerBounds(); } else { overApproximation->takeCurrentValuesAsUpperBounds(); } overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one() - overApproxHeuristicPar.observationThreshold); overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; overApproxFixPoint = buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); if (betterBound) { STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); } } else { break; } } if (options.unfold && result.diff() > options.refinePrecision) { // Refine under-approximation underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; underApproxFixPoint = buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); if (betterBound) { STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); } } else { break; } } if (storm::utility::resources::isTerminate()) { break; } else { ++statistics.refinementSteps.get(); // Don't make too many outputs (to avoid logfile clutter) if (statistics.refinementSteps.get() <= 1000) { STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); bool computingLowerBound = false; bool computingUpperBound = false; if (options.discretize) { STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); (min ? computingLowerBound : computingUpperBound) = true; } if (options.unfold) { STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); (min ? computingUpperBound : computingLowerBound) = true; } if (computingLowerBound && computingUpperBound) { STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); } else if (computingLowerBound) { STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); } else if (computingUpperBound) { STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); } STORM_PRINT_AND_LOG(std::endl); STORM_LOG_WARN_COND(statistics.refinementSteps.get() < 1000, "Refinement requires more than 1000 iterations."); } } if (overApproxFixPoint && underApproxFixPoint) { STORM_PRINT_AND_LOG("Refinement fixpoint reached after " << statistics.refinementSteps.get() << " iterations." << std::endl); break; } } } /*! * Heuristically rates the quality of the approximation described by the given successor observation info. * Here, 0 means a bad approximation and 1 means a good approximation. */ template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { auto n = storm::utility::convertNumber(info.support.size()); auto one = storm::utility::one(); if (storm::utility::isOne(n)) { // If the belief is Dirac, it has to be approximated precisely. // In this case, we return the best possible rating return one; } else { // Create the rating for this observation at this choice from the given info ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // Normalize the rating so that it ranges from 0 to 1, where // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. obsChoiceRating = (obsChoiceRating * n - one) / (n - one); // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); return obsChoiceRating; } } template std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates(); auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices(); std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); std::map gatheredSuccessorObservations; // Declare here to avoid reallocations for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) { // Check whether this state is reached under an optimal scheduler. // The heuristic assumes that the remaining states are not relevant for the observation score. if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) { for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) { // Similarly, only optimal actions are relevant if (overApproximation->actionIsOptimal(mdpChoice)) { // score the observations for this choice gatheredSuccessorObservations.clear(); overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { auto const& obs = obsInfo.first; ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); // The rating of the observation will be the minimum over all choice-based observation ratings resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); } } } } } return resultingRatings; } template bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { // Detect whether the refinement reached a fixpoint. bool fixPoint = true; // current maximal resolution (needed for refinement heuristic) uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; if (!refine) { // If we build the model from scratch, we first have to setup the explorer for the overApproximation. if (computeRewards) { overApproximation->startNewExploration(storm::utility::zero()); } else { overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } } else { // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); // We also need to find out which observation resolutions needs refinement. auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); // If there is a score < 1, we have not reached a fixpoint, yet if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one();})) { fixPoint = false; } refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Increment the resolution at the refined observations. // Detect overflows properly. storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber(observationResolutionVector[obs]) * storm::utility::convertNumber(options.resolutionFactor); if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { observationResolutionVector[obs] = std::numeric_limits::max(); } else { observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); } } overApproximation->restartExploration(); } statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); // Start exploration storm::utility::Stopwatch explorationTime; if (options.explorationTimeLimit) { explorationTime.start(); } bool timeLimitExceeded = false; std::map gatheredSuccessorObservations; // Declare here to avoid reallocations uint64_t numRewiredOrExploredStates = 0; while (overApproximation->hasUnexploredState()) { if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { STORM_LOG_INFO("Exploration time limit exceeded."); timeLimitExceeded = true; fixPoint = false; } uint64_t currId = overApproximation->exploreNextState(); bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior(); if (!hasOldBehavior) { fixPoint = false; // Exploring a new state! } uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { overApproximation->setCurrentStateIsTarget(); overApproximation->addSelfloopTransition(); } else { // We need to decide how to treat this state (and each individual enabled action). There are the following cases: // 1 The state has no old behavior and // 1.1 we explore all actions or // 1.2 we truncate all actions // 2 The state has old behavior and was truncated in the last iteration and // 2.1 we explore all actions or // 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds) // 3 The state has old behavior and was not truncated in the last iteration and the current action // 3.1 should be rewired or // 3.2 should get the old behavior but either // 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action // 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step // Find out in which case we are bool exploreAllActions = false; bool truncateAllActions = false; bool restoreAllActions = false; bool checkRewireForAllActions = false; ValueType gap = storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()); if (!hasOldBehavior) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 1.1 } else { truncateAllActions = true; // Case 1.2 overApproximation->setCurrentStateIsTruncated(); } } else { if (overApproximation->getCurrentStateWasTruncated()) { // Case 2 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 2.1 fixPoint = false; } else { truncateAllActions = true; // Case 2.2 overApproximation->setCurrentStateIsTruncated(); if (fixPoint) { // Properly check whether this can still be a fixpoint if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) { fixPoint = false; } //} else { // In this case we truncated a state that is not reachable under optimal schedulers. // If no other state is explored (i.e. fixPoint remaints true), these states should still not be reachable in subsequent iterations } } } else { // Case 3 // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the action) if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { checkRewireForAllActions = true; // Case 3.1 or Case 3.2 } else { restoreAllActions = true; // Definitely Case 3.2 // We still need to check for each action whether rewiring makes sense later checkRewireForAllActions = true; } } } bool expandedAtLeastOneAction = false; for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { bool expandCurrentAction = exploreAllActions || truncateAllActions; if (checkRewireForAllActions) { assert(refine); // In this case, we still need to check whether this action needs to be expanded assert(!expandCurrentAction); // Check the action dependent conditions for rewiring // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states) assert(overApproximation->currentStateHasOldBehavior()); if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { fixPoint = false; // Then, check whether the other criteria for rewiring are satisfied if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { // Do the rewiring now! (Case 3.1) expandCurrentAction = true; } else { // Delay the rewiring (Case 3.2.2) overApproximation->setCurrentChoiceIsDelayed(action); } } // else { Case 3.2.1 } } if (expandCurrentAction) { expandedAtLeastOneAction = true; if (!truncateAllActions) { // Cases 1.1, 2.1, or 3.1 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); for (auto const& successor : successorGridPoints) { overApproximation->addTransitionToBelief(action, successor.first, successor.second, false); } if (computeRewards) { overApproximation->computeRewardAtCurrentState(action); } } else { // Cases 1.2 or 2.2 ValueType truncationProbability = storm::utility::zero(); ValueType truncationValueBound = storm::utility::zero(); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); for (auto const& successor : successorGridPoints) { bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true); if (!added) { // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); } } if (computeRewards) { // The truncationValueBound will be added on top of the reward introduced by the current belief state. overApproximation->addTransitionsToExtraStates(action, truncationProbability); overApproximation->computeRewardAtCurrentState(action, truncationValueBound); } else { overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } } else { // Case 3.2 overApproximation->restoreOldBehaviorAtCurrentState(action); } } if (expandedAtLeastOneAction) { ++numRewiredOrExploredStates; } } if (storm::utility::resources::isTerminate()) { break; } } if (storm::utility::resources::isTerminate()) { // don't overwrite statistics of a previous, successful computation if (!statistics.overApproximationStates) { statistics.overApproximationBuildAborted = true; statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); } statistics.overApproximationBuildTime.stop(); return false; } overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); // don't overwrite statistics of a previous, successful computation if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) { statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates(); } return fixPoint; } template bool ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); bool fixPoint = true; if (heuristicParameters.sizeThreshold != std::numeric_limits::max()) { statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; } if (!refine) { // Build a new under approximation if (computeRewards) { underApproximation->startNewExploration(storm::utility::zero()); } else { underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } } else { // Restart the building process underApproximation->restartExploration(); } // Expand the beliefs storm::utility::Stopwatch explorationTime; if (options.explorationTimeLimit) { explorationTime.start(); } bool timeLimitExceeded = false; while (underApproximation->hasUnexploredState()) { if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { STORM_LOG_INFO("Exploration time limit exceeded."); timeLimitExceeded = true; } uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); if (!stateAlreadyExplored || timeLimitExceeded) { fixPoint = false; } if (targetObservations.count(currObservation) != 0) { underApproximation->setCurrentStateIsTarget(); underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; if (timeLimitExceeded) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } else if (!stateAlreadyExplored) { // Check whether we want to explore the state now! if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } else if (underApproximation->getCurrentNumberOfMdpStates() >= heuristicParameters.sizeThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { // Always restore old behavior if available if (stateAlreadyExplored) { underApproximation->restoreOldBehaviorAtCurrentState(action); } else { ValueType truncationProbability = storm::utility::zero(); ValueType truncationValueBound = storm::utility::zero(); auto successors = beliefManager->expand(currId, action); for (auto const& successor : successors) { bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); if (!added) { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other // under-approximation values. In general, this does not yield a sound underapproximation anymore. // However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler. truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); } } if (stopExploration) { if (computeRewards) { underApproximation->addTransitionsToExtraStates(action, truncationProbability); } else { underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } if (computeRewards) { // The truncationValueBound will be added on top of the reward introduced by the current belief state. underApproximation->computeRewardAtCurrentState(action, truncationValueBound); } } } } if (storm::utility::resources::isTerminate()) { break; } } if (storm::utility::resources::isTerminate()) { // don't overwrite statistics of a previous, successful computation if (!statistics.underApproximationStates) { statistics.underApproximationBuildAborted = true; statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); } statistics.underApproximationBuildTime.stop(); return false; } underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); statistics.underApproximationCheckTime.start(); underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); // don't overwrite statistics of a previous, successful computation if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) { statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates(); } return fixPoint; } template class ApproximatePOMDPModelchecker>; template class ApproximatePOMDPModelchecker>; } } }