#include "ApproximatePOMDPModelchecker.h" #include #include #include "storm-pomdp/analysis/FormulaInformation.h" #include "storm/utility/ConstantsComparator.h" #include "storm/utility/NumberTraits.h" #include "storm/utility/graph.h" #include "storm/logic/Formulas.h" #include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" #include "storm/exceptions/NotSupportedException.h" namespace storm { namespace pomdp { namespace modelchecker { template ApproximatePOMDPModelchecker::Options::Options() { initialGridResolution = 10; explorationThreshold = storm::utility::zero(); doRefinement = true; refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; } template ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { // Intentionally left empty } template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::Result::diff(bool relative) const { ValueType diff = upperBound - lowerBound; if (diff < storm::utility::zero()) { STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); diff = storm::utility::zero(); } if (relative && !storm::utility::isZero(upperBound)) { diff /= upperBound; } return diff; } template ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } template typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { // Reset all collected statistics statistics = Statistics(); // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); boost::optional rewardModelName; if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); if (formulaInfo.isNonNestedReachabilityProbability()) { if (!formulaInfo.getSinkStates().empty()) { auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); } } else { // Expected reward formula! rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } if (options.doRefinement) { refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } else { computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } return result; } template void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; if (statistics.aborted) { stream << "# Computation aborted early" << std::endl; } // Refinement information: if (statistics.refinementSteps) { stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; } // The overapproximation MDP: if (statistics.overApproximationStates) { stream << "# Number of states in the "; if (options.doRefinement) { stream << "final "; } stream << "grid MDP for the over-approximation: "; if (statistics.overApproximationBuildAborted) { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; } // The underapproximation MDP: if (statistics.underApproximationStates) { stream << "# Number of states in the "; if (options.doRefinement) { stream << "final "; } stream << "grid MDP for the under-approximation: "; if (statistics.underApproximationBuildAborted) { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } stream << "##########################################" << std::endl; } template void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } uint64_t underApproxSizeThreshold = 0; { // Overapproximation std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; resultValue = approx->getComputedValueAtInitialState(); underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); } } { // Underapproximation (Uses a fresh Belief manager) auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.upperBound : result.lowerBound; resultValue = approx->getComputedValueAtInitialState(); } } } template void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); auto overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); auto underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { overApproxBeliefManager->setRewardModel(rewardModelName); underApproxBeliefManager->setRewardModel(rewardModelName); } // OverApproximaion auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation); if (!overApproximation->hasComputedValues()) { return; } ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; overApproxValue = overApproximation->getComputedValueAtInitialState(); // UnderApproximation uint64_t underApproxSizeThreshold = std::max(overApproximation->getExploredMdp()->getNumberOfStates(), 10); auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); if (!underApproximation->hasComputedValues()) { return; } ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; underApproxValue = underApproximation->getComputedValueAtInitialState(); // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; ValueType refinementAggressiveness = storm::utility::convertNumber(0.0); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } ++statistics.refinementSteps.get(); STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "."); // Refine over-approximation STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << "."); buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues()) { overApproxValue = overApproximation->getComputedValueAtInitialState(); } else { break; } if (result.diff() > options.refinementPrecision) { // Refine under-approximation underApproxSizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); if (underApproximation->hasComputedValues()) { underApproxValue = underApproximation->getComputedValueAtInitialState(); } else { break; } } } } /*! * Heuristically rates the quality of the approximation described by the given successor observation info. * Here, 0 means a bad approximation and 1 means a good approximation. */ template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { auto n = storm::utility::convertNumber(info.successorWithObsCount); auto one = storm::utility::one(); // Create the rating for this observation at this choice from the given info ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // Normalize the rating so that it ranges from 0 to 1, where // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. obsChoiceRating = (obsChoiceRating * n - one) / (n - one); // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); return obsChoiceRating; } template std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); std::map gatheredSuccessorObservations; // Declare here to avoid reallocations for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) { gatheredSuccessorObservations.clear(); overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { auto const& obs = obsInfo.first; ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); // The rating of the observation will be the minimum over all choice-based observation ratings resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); } } return resultingRatings; } template void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); // current maximal resolution (needed for refinement heuristic) uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; if (!refine) { // If we build the model from scratch, we first have to setup the explorer for the overApproximation. if (computeRewards) { overApproximation->startNewExploration(storm::utility::zero()); } else { overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } } else { // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); // Potentially increase the aggressiveness so that at least one observation actually gets refinement. *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); observationResolutionVector[obs] += storm::utility::convertNumber(storm::utility::ceil(incrementValue)); } overApproximation->restartExploration(); } statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); // Start exploration std::map gatheredSuccessorObservations; // Declare here to avoid reallocations while (overApproximation->hasUnexploredState()) { uint64_t currId = overApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { overApproximation->setCurrentStateIsTarget(); overApproximation->addSelfloopTransition(); } else { bool stopExploration = false; if (storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; overApproximation->setCurrentStateIsTruncated(); } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { // Check whether we expand this state/action pair // We always expand if we are not doing refinement of if the state was not available in the "old" MDP. // Otherwise, a heuristic decides. bool expandStateAction = true; if (refine && overApproximation->currentStateHasOldBehavior()) { // Compute a rating of the current state/action pair ValueType stateActionRating = storm::utility::one(); gatheredSuccessorObservations.clear(); overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { if (refinedObservations.get(obsInfo.first)) { ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], oldMaxResolution); stateActionRating = std::min(stateActionRating, obsRating); } } // Only refine if this rating is below the doubled refinementAggressiveness expandStateAction = stateActionRating < storm::utility::convertNumber(2.0) * (*refinementAggressiveness); } if (expandStateAction) { ValueType truncationProbability = storm::utility::zero(); ValueType truncationValueBound = storm::utility::zero(); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); for (auto const& successor : successorGridPoints) { bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); if (!added) { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); } } if (stopExploration) { if (computeRewards) { overApproximation->addTransitionsToExtraStates(action, truncationProbability); } else { overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } if (computeRewards) { // The truncationValueBound will be added on top of the reward introduced by the current belief state. overApproximation->computeRewardAtCurrentState(action, truncationValueBound); } } else { // Do not refine here overApproximation->restoreOldBehaviorAtCurrentState(action); } } } if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildAborted = true; break; } } // TODO: Drop unreachable states (sometimes?) statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return; } overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states."); statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); } template void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); statistics.underApproximationStateLimit = maxStateCount; if (!underApproximation->hasComputedValues()) { // Build a new under approximation if (computeRewards) { underApproximation->startNewExploration(storm::utility::zero()); } else { underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } } else { // Restart the building process underApproximation->restartExploration(); } // Expand the beliefs while (underApproximation->hasUnexploredState()) { uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { underApproximation->setCurrentStateIsTarget(); underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; if (!underApproximation->currentStateHasOldBehavior()) { if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); } } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { // Always restore old behavior if available if (underApproximation->currentStateHasOldBehavior()) { underApproximation->restoreOldBehaviorAtCurrentState(action); } else { ValueType truncationProbability = storm::utility::zero(); ValueType truncationValueBound = storm::utility::zero(); auto successors = beliefManager->expand(currId, action); for (auto const& successor : successors) { bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); if (!added) { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); } } if (stopExploration) { if (computeRewards) { underApproximation->addTransitionsToExtraStates(action, truncationProbability); } else { underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } if (computeRewards) { // The truncationValueBound will be added on top of the reward introduced by the current belief state. underApproximation->computeRewardAtCurrentState(action, truncationValueBound); } } } } if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildAborted = true; break; } } statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); return; } underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); statistics.underApproximationCheckTime.start(); underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); } template class ApproximatePOMDPModelchecker>; template class ApproximatePOMDPModelchecker>; } } }