From 55c4408c6a783fac39ec191279ed49997e4ba74b Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 11 May 2020 10:01:18 +0200 Subject: [PATCH] Storing the observation resolutions as a float so that we can increase the resolution more accurately with a non-integer factor --- .../ApproximatePOMDPModelchecker.cpp | 41 +++++++++---------- .../ApproximatePOMDPModelchecker.h | 7 ++-- src/storm-pomdp/storage/BeliefManager.h | 33 +++++++-------- 3 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index b08eb8177..b43e62711 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -191,7 +191,7 @@ namespace storm { void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result) { if (options.discretize) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit); + std::vector observationResolutionVector(pomdp.getNrObservations(), storm::utility::convertNumber(options.resolutionInit)); auto manager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { manager->setRewardModel(rewardModelName); @@ -244,12 +244,12 @@ namespace storm { statistics.refinementSteps = 0; // Set up exploration data - std::vector observationResolutionVector; + std::vector observationResolutionVector; std::shared_ptr overApproxBeliefManager; std::shared_ptr overApproximation; HeuristicParameters overApproxHeuristicPar; if (options.discretize) { // Setup and build first OverApproximation - observationResolutionVector = std::vector(pomdp.getNrObservations(), options.resolutionInit); + observationResolutionVector = std::vector(pomdp.getNrObservations(), storm::utility::convertNumber(options.resolutionInit)); overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); if (rewardModelName) { overApproxBeliefManager->setRewardModel(rewardModelName); @@ -404,32 +404,33 @@ namespace storm { * Here, 0 means a bad approximation and 1 means a good approximation. */ template - typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { - auto n = storm::utility::convertNumber(info.support.size()); - auto one = storm::utility::one(); + BeliefValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) { + auto n = storm::utility::convertNumber(info.support.size()); + auto one = storm::utility::one(); if (storm::utility::isOne(n)) { // If the belief is Dirac, it has to be approximated precisely. // In this case, we return the best possible rating return one; } else { // Create the rating for this observation at this choice from the given info - ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + BeliefValueType obsChoiceRating = storm::utility::convertNumber(info.maxProbabilityToSuccessorWithObs / info.observationProbability); // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // Normalize the rating so that it ranges from 0 to 1, where // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. obsChoiceRating = (obsChoiceRating * n - one) / (n - one); // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) - obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); + obsChoiceRating *= observationResolution / maxResolution; return obsChoiceRating; } } template - std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { + std::vector ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector) { uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates(); auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices(); + BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); std::map gatheredSuccessorObservations; // Declare here to avoid reallocations for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) { @@ -444,7 +445,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { auto const& obs = obsInfo.first; - ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); + BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); // The rating of the observation will be the minimum over all choice-based observation ratings resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); @@ -476,14 +477,11 @@ namespace storm { } template - bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { // Detect whether the refinement reached a fixpoint. bool fixPoint = true; - // current maximal resolution (needed for refinement heuristic) - uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; if (!refine) { @@ -497,7 +495,8 @@ namespace storm { // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); // We also need to find out which observation resolutions needs refinement. - auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); + // current maximal resolution (needed for refinement heuristic) + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector); // If there is a score < 1, we have not reached a fixpoint, yet if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one();})) { fixPoint = false; @@ -506,17 +505,17 @@ namespace storm { STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Increment the resolution at the refined observations. - // Detect overflows properly. + // Use storm's rational number to detect overflows properly. storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber(observationResolutionVector[obs]) * storm::utility::convertNumber(options.resolutionFactor); - if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { - observationResolutionVector[obs] = std::numeric_limits::max(); + if (newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { + observationResolutionVector[obs] = storm::utility::convertNumber(std::numeric_limits::max()); } else { - observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); + observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); } } overApproximation->restartExploration(); } - statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); + statistics.overApproximationMaxResolution = storm::utility::convertNumber(storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()))); // Start exploration storm::utility::Stopwatch explorationTime; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index c9f46aa1f..0e1b3a715 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -49,7 +49,6 @@ namespace storm { * * @param targetObservations set of target observations * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation * @param computeRewards true if rewards are to be computed, false if probability is computed * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value @@ -79,7 +78,7 @@ namespace storm { * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) */ - bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties @@ -87,9 +86,9 @@ namespace storm { */ bool buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); + BeliefValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution); - std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution); + std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector); struct Statistics { Statistics(); diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 596d67bd5..4f9ca445c 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -111,7 +111,7 @@ namespace storm { return pomdp.getNumberOfChoices(belief.begin()->first); } - Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution) { return triangulateBelief(getBelief(beliefId), resolution); } @@ -134,7 +134,7 @@ namespace storm { return beliefs.size(); } - std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { return expandInternal(beliefId, actionIndex, observationResolutions); } @@ -293,10 +293,10 @@ namespace storm { } }; - void triangulateBeliefFreudenthal(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { + void triangulateBeliefFreudenthal(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0"); + STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); StateType numEntries = belief.size(); - auto convResolution = storm::utility::convertNumber(resolution); // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N // Variable names are mostly based on the paper @@ -307,12 +307,12 @@ namespace storm { qsRow.reserve(numEntries); std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices toOriginalIndicesMap.reserve(numEntries); - BeliefValueType x = convResolution; + BeliefValueType x = resolution; for (auto const& entry : belief) { qsRow.push_back(storm::utility::floor(x)); // v sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v toOriginalIndicesMap.push_back(entry.first); - x -= entry.second * convResolution; + x -= entry.second * resolution; } // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler qsRow.push_back(storm::utility::zero()); @@ -339,7 +339,7 @@ namespace storm { for (StateType j = 0; j < numEntries; ++j) { BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; if (!cc.isZero(gridPointEntry)) { - gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution; } } result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); @@ -348,17 +348,17 @@ namespace storm { } } - void triangulateBeliefDynamic(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) { + void triangulateBeliefDynamic(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal - uint64_t finalResolution = resolution; + STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); + BeliefValueType finalResolution = resolution; uint64_t finalResolutionMisses = belief.size() + 1; // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these) - for (uint64_t currResolution = resolution; currResolution > resolution / 2; --currResolution) { + for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) { uint64_t currResMisses = 0; - BeliefValueType currResolutionConverted = storm::utility::convertNumber(currResolution); bool continueWithNextResolution = false; for (auto const& belEntry : belief) { - BeliefValueType product = belEntry.second * currResolutionConverted; + BeliefValueType product = belEntry.second * currResolution; if (!cc.isZero(product - storm::utility::round(product))) { ++currResMisses; if (currResMisses >= finalResolutionMisses) { @@ -384,7 +384,7 @@ namespace storm { triangulateBeliefFreudenthal(belief, finalResolution, result); } - Triangulation triangulateBelief(BeliefType const& belief, uint64_t const& resolution) { + Triangulation triangulateBelief(BeliefType const& belief, BeliefValueType const& resolution) { STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); Triangulation result; // Quickly triangulate Dirac beliefs @@ -392,12 +392,13 @@ namespace storm { result.weights.push_back(storm::utility::one()); result.gridPoints.push_back(getOrAddBeliefId(belief)); } else { + auto ceiledResolution = storm::utility::ceil(resolution); switch (triangulationMode) { case TriangulationMode::Static: - triangulateBeliefFreudenthal(belief, resolution, result); + triangulateBeliefFreudenthal(belief, ceiledResolution, result); break; case TriangulationMode::Dynamic: - triangulateBeliefDynamic(belief, resolution, result); + triangulateBeliefDynamic(belief, ceiledResolution, result); break; default: STORM_LOG_ASSERT(false, "Invalid triangulation mode."); @@ -407,7 +408,7 @@ namespace storm { return result; } - std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::vector> destinations; BeliefType belief = getBelief(beliefId);