From 4ea452854fbce16a2316d82c0b310bedd7b339fb Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 22 Apr 2020 08:58:19 +0200 Subject: [PATCH] Fixes for scoring observations --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 17 +++++++---- .../ApproximatePOMDPModelchecker.cpp | 30 ++++++++++++------- src/storm-pomdp/storage/BeliefManager.h | 11 ++++++- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index e5d233aec..5fb5bfe26 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -456,10 +456,14 @@ namespace storm { return upperValueBounds[getCurrentMdpState()]; } + /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties) + /// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid. ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); } - + + /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties) + /// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid. ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); } @@ -507,7 +511,7 @@ namespace storm { // Intentionally left empty. } - void join(SuccessorObservationInformation other) { + void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons) observationProbability += other.observationProbability; maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); successorWithObsCount += other.successorWithObsCount; @@ -515,17 +519,19 @@ namespace storm { ValueType observationProbability; /// The probability we move to the corresponding observation. ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. - uint64_t successorWithObsCount; /// The number of successors with this observation + uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation + typename BeliefManagerType::BeliefSupportType support; }; - void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map& gatheredSuccessorObservations) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + } - void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map& gatheredSuccessorObservations) { STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { auto const& beliefId = getBeliefId(entry.getColumn()); @@ -537,6 +543,7 @@ namespace storm { // There already is an entry for this observation, so join the two informations obsInsertion.first->second.join(info); } + beliefManager->joinSupport(beliefId, obsInsertion.first->second.support); } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 622e63512..b35ded61c 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -283,18 +283,23 @@ namespace storm { */ template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { - auto n = storm::utility::convertNumber(info.successorWithObsCount); + auto n = storm::utility::convertNumber(info.support.size()); auto one = storm::utility::one(); - - // Create the rating for this observation at this choice from the given info - ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; - // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 - // Normalize the rating so that it ranges from 0 to 1, where - // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. - obsChoiceRating = (obsChoiceRating * n - one) / (n - one); - // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) - obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); - return obsChoiceRating; + if (storm::utility::isOne(n)) { + // If the belief is Dirac, it has to be approximated precisely. + // In this case, we return the best possible rating + return one; + } else { + // Create the rating for this observation at this choice from the given info + ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) + obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); + return obsChoiceRating; + } } template @@ -487,6 +492,9 @@ namespace storm { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; + // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other + // under-approximation values. In general, this does not yield a sound underapproximation anymore. + // However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler. truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); } } diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 25ec3a3d2..b01ad5358 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "storm/adapters/RationalNumberAdapter.h" #include "storm/utility/macros.h" #include "storm/exceptions/UnexpectedException.h" @@ -16,6 +17,7 @@ namespace storm { typedef typename PomdpType::ValueType ValueType; typedef boost::container::flat_map BeliefType; // iterating over this shall be ordered (for correct hash computation) + typedef boost::container::flat_set BeliefSupportType; typedef uint64_t BeliefId; BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { @@ -99,7 +101,7 @@ namespace storm { } uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { - auto belief = getBelief(beliefId); + auto const& belief = getBelief(beliefId); return pomdp.getNumberOfChoices(belief.begin()->first); } @@ -115,6 +117,13 @@ namespace storm { } } + void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) { + auto const& belief = getBelief(beliefId); + for (auto const& entry : belief) { + support.insert(entry.first); + } + } + BeliefId getNumberOfBeliefIds() const { return beliefs.size(); }