Fixes for scoring observations

5 years ago · 4ea452854f
3 changed files with 41 additions and 17 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -456,10 +456,14 @@ namespace storm {
                return upperValueBounds[getCurrentMdpState()];
            }

+            /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties)
+            /// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
            ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
                return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
            }
-
+ 
+            /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties)
+            /// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
            ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
                 return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
            }
@ -507,7 +511,7 @@ namespace storm {
                    // Intentionally left empty.
                }
                
-                void join(SuccessorObservationInformation other) {
+                void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons)
                    observationProbability += other.observationProbability;
                    maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
                    successorWithObsCount += other.successorWithObsCount;
@ -515,17 +519,19 @@ namespace storm {
                
                ValueType observationProbability; /// The probability we move to the corresponding observation.
                ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation.
-                uint64_t successorWithObsCount; /// The number of successors with this observation
+                uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation
+                typename BeliefManagerType::BeliefSupportType support;
            };
            
-            void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+            void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
                uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
                gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
+                
            }
            
-            void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+            void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
                STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
                for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
                    auto const& beliefId = getBeliefId(entry.getColumn());
@ -537,6 +543,7 @@ namespace storm {
                            // There already is an entry for this observation, so join the two informations
                            obsInsertion.first->second.join(info);
                        }
+                        beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
                    }
                }
            }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -283,18 +283,23 @@ namespace storm {
             */
            template<typename PomdpModelType, typename BeliefValueType>
            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
-                auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
+                auto n = storm::utility::convertNumber<ValueType, uint64_t>(info.support.size());
                auto one = storm::utility::one<ValueType>();
-                
-                // Create the rating for this observation at this choice from the given info
-                ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
-                // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
-                // Normalize the rating so that it ranges from 0 to 1, where
-                // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
-                obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
-                // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
-                obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
-                return obsChoiceRating;
+                if (storm::utility::isOne(n)) {
+                    // If the belief is Dirac, it has to be approximated precisely.
+                    // In this case, we return the best possible rating
+                    return one;
+                } else {
+                    // Create the rating for this observation at this choice from the given info
+                    ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
+                    // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
+                    // Normalize the rating so that it ranges from 0 to 1, where
+                    // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
+                    obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
+                    // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
+                    obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
+                    return obsChoiceRating;
+                }
            }
            
            template<typename PomdpModelType, typename BeliefValueType>
@ -487,6 +492,9 @@ namespace storm {
                                        STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
                                        // We did not explore this successor state. Get a bound on the "missing" value
                                        truncationProbability += successor.second;
+                                        // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other
+                                        // under-approximation values. In general, this does not yield a sound underapproximation anymore.
+                                        // However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler.
                                        truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first));
                                    }
                                }
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@ -3,6 +3,7 @@
 #include <unordered_map>
 #include <boost/optional.hpp>
 #include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
 #include "storm/adapters/RationalNumberAdapter.h"
 #include "storm/utility/macros.h"
 #include "storm/exceptions/UnexpectedException.h"
@ -16,6 +17,7 @@ namespace storm {
            
            typedef typename PomdpType::ValueType ValueType;
            typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation)
+            typedef boost::container::flat_set<StateType> BeliefSupportType;
            typedef uint64_t BeliefId;
            
            BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
@ -99,7 +101,7 @@ namespace storm {
            }
            
            uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
-                auto belief = getBelief(beliefId);
+                auto const& belief = getBelief(beliefId);
                return pomdp.getNumberOfChoices(belief.begin()->first);
            }
            
@ -115,6 +117,13 @@ namespace storm {
                }
            }
            
+            void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) {
+                auto const& belief = getBelief(beliefId);
+                for (auto const& entry : belief) {
+                    support.insert(entry.first);
+                }
+            }
+            
            BeliefId getNumberOfBeliefIds() const {
                return beliefs.size();
            }