From 4ea452854fbce16a2316d82c0b310bedd7b339fb Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 22 Apr 2020 08:58:19 +0200
Subject: [PATCH] Fixes for scoring observations

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h   | 17 +++++++----
 .../ApproximatePOMDPModelchecker.cpp          | 30 ++++++++++++-------
 src/storm-pomdp/storage/BeliefManager.h       | 11 ++++++-
 3 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index e5d233aec..5fb5bfe26 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -456,10 +456,14 @@ namespace storm {
                 return upperValueBounds[getCurrentMdpState()];
             }
 
+            /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties)
+            /// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
             ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
                 return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
             }
-
+ 
+            /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties)
+            /// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
             ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
                  return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
             }
@@ -507,7 +511,7 @@ namespace storm {
                     // Intentionally left empty.
                 }
                 
-                void join(SuccessorObservationInformation other) {
+                void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons)
                     observationProbability += other.observationProbability;
                     maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
                     successorWithObsCount += other.successorWithObsCount;
@@ -515,17 +519,19 @@ namespace storm {
                 
                 ValueType observationProbability; /// The probability we move to the corresponding observation.
                 ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation.
-                uint64_t successorWithObsCount; /// The number of successors with this observation
+                uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation
+                typename BeliefManagerType::BeliefSupportType support;
             };
             
-            void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+            void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
                 uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
                 gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
+                
             }
             
-            void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+            void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
                 STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
                 for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
                     auto const& beliefId = getBeliefId(entry.getColumn());
@@ -537,6 +543,7 @@ namespace storm {
                             // There already is an entry for this observation, so join the two informations
                             obsInsertion.first->second.join(info);
                         }
+                        beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
                     }
                 }
             }
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 622e63512..b35ded61c 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -283,18 +283,23 @@ namespace storm {
              */
             template<typename PomdpModelType, typename BeliefValueType>
             typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
-                auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
+                auto n = storm::utility::convertNumber<ValueType, uint64_t>(info.support.size());
                 auto one = storm::utility::one<ValueType>();
-                
-                // Create the rating for this observation at this choice from the given info
-                ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
-                // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
-                // Normalize the rating so that it ranges from 0 to 1, where
-                // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
-                obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
-                // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
-                obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
-                return obsChoiceRating;
+                if (storm::utility::isOne(n)) {
+                    // If the belief is Dirac, it has to be approximated precisely.
+                    // In this case, we return the best possible rating
+                    return one;
+                } else {
+                    // Create the rating for this observation at this choice from the given info
+                    ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
+                    // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
+                    // Normalize the rating so that it ranges from 0 to 1, where
+                    // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
+                    obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
+                    // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
+                    obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
+                    return obsChoiceRating;
+                }
             }
             
             template<typename PomdpModelType, typename BeliefValueType>
@@ -487,6 +492,9 @@ namespace storm {
                                         STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
                                         // We did not explore this successor state. Get a bound on the "missing" value
                                         truncationProbability += successor.second;
+                                        // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other
+                                        // under-approximation values. In general, this does not yield a sound underapproximation anymore.
+                                        // However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler.
                                         truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first));
                                     }
                                 }
diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index 25ec3a3d2..b01ad5358 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -3,6 +3,7 @@
 #include <unordered_map>
 #include <boost/optional.hpp>
 #include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
 #include "storm/adapters/RationalNumberAdapter.h"
 #include "storm/utility/macros.h"
 #include "storm/exceptions/UnexpectedException.h"
@@ -16,6 +17,7 @@ namespace storm {
             
             typedef typename PomdpType::ValueType ValueType;
             typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation)
+            typedef boost::container::flat_set<StateType> BeliefSupportType;
             typedef uint64_t BeliefId;
             
             BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
@@ -99,7 +101,7 @@ namespace storm {
             }
             
             uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
-                auto belief = getBelief(beliefId);
+                auto const& belief = getBelief(beliefId);
                 return pomdp.getNumberOfChoices(belief.begin()->first);
             }
             
@@ -115,6 +117,13 @@ namespace storm {
                 }
             }
             
+            void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) {
+                auto const& belief = getBelief(beliefId);
+                for (auto const& entry : belief) {
+                    support.insert(entry.first);
+                }
+            }
+            
             BeliefId getNumberOfBeliefIds() const {
                 return beliefs.size();
             }