Storing the observation resolutions as a float so that we can increase the resolution more accurately with a non-integer factor

6 years ago · 55c4408c6a
3 changed files with 40 additions and 41 deletions
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -191,7 +191,7 @@ namespace storm {
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) {
                
                if (options.discretize) {
-                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit);
+                    std::vector<BeliefValueType> observationResolutionVector(pomdp.getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
@ -244,12 +244,12 @@ namespace storm {
                statistics.refinementSteps = 0;

                // Set up exploration data
-                std::vector<uint64_t> observationResolutionVector;
+                std::vector<BeliefValueType> observationResolutionVector;
                std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
                std::shared_ptr<ExplorerType> overApproximation;
                HeuristicParameters overApproxHeuristicPar;
                if (options.discretize) { // Setup and build first OverApproximation
-                    observationResolutionVector = std::vector<uint64_t>(pomdp.getNrObservations(), options.resolutionInit);
+                    observationResolutionVector = std::vector<BeliefValueType>(pomdp.getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
                    overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
                    if (rewardModelName) {
                        overApproxBeliefManager->setRewardModel(rewardModelName);
@ -404,32 +404,33 @@ namespace storm {
             * Here, 0 means a bad approximation and 1 means a good approximation.
             */
            template<typename PomdpModelType, typename BeliefValueType>
-            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
-                auto n = storm::utility::convertNumber<ValueType, uint64_t>(info.support.size());
-                auto one = storm::utility::one<ValueType>();
+            BeliefValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) {
+                auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
+                auto one = storm::utility::one<BeliefValueType>();
                if (storm::utility::isOne(n)) {
                    // If the belief is Dirac, it has to be approximated precisely.
                    // In this case, we return the best possible rating
                    return one;
                } else {
                    // Create the rating for this observation at this choice from the given info
-                    ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
+                    BeliefValueType obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
                    // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
                    // Normalize the rating so that it ranges from 0 to 1, where
                    // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
                    obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
                    // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
-                    obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
+                    obsChoiceRating *= observationResolution / maxResolution;
                    return obsChoiceRating;
                }
            }
            
            template<typename PomdpModelType, typename BeliefValueType>
-            std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution) {
+            std::vector<BeliefValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector) {
                uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
                auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
+                BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());

-                std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>());
+                std::vector<BeliefValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<BeliefValueType>());
                
                std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
                for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
@ -444,7 +445,7 @@ namespace storm {
                                overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
                                for (auto const& obsInfo : gatheredSuccessorObservations) {
                                    auto const& obs = obsInfo.first;
-                                    ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
+                                    BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
             
                                    // The rating of the observation will be the minimum over all choice-based observation ratings
                                    resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
@ -476,14 +477,11 @@ namespace storm {
            }
            
            template<typename PomdpModelType, typename BeliefValueType>
-            bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+            bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
                
                // Detect whether the refinement reached a fixpoint.
                bool fixPoint = true;
                
-                // current maximal resolution (needed for refinement heuristic)
-                uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
-
                statistics.overApproximationBuildTime.start();
                storm::storage::BitVector refinedObservations;
                if (!refine) {
@ -497,7 +495,8 @@ namespace storm {
                    // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy
                    overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
                    // We also need to find out which observation resolutions needs refinement.
-                    auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution);
+                    // current maximal resolution (needed for refinement heuristic)
+                    auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
                    // If there is a score < 1, we have not reached a fixpoint, yet
                    if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one<ValueType>();})) {
                        fixPoint = false;
@ -506,17 +505,17 @@ namespace storm {
                    STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
                    for (auto const& obs : refinedObservations) {
                        // Increment the resolution at the refined observations.
-                        // Detect overflows properly.
+                        // Use storm's rational number to detect overflows properly.
                        storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) * storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
-                        if (newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<uint64_t>::max())) {
-                            observationResolutionVector[obs] = std::numeric_limits<uint64_t>::max();
+                        if (newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber, uint64_t>(std::numeric_limits<uint32_t>::max())) {
+                            observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType, uint64_t>(std::numeric_limits<uint32_t>::max());
                        } else {
-                            observationResolutionVector[obs] = storm::utility::convertNumber<uint64_t>(newObsResolutionAsRational);
+                            observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
                        }
                    }
                    overApproximation->restartExploration();
                }
-                statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
+                statistics.overApproximationMaxResolution = storm::utility::convertNumber<uint64_t>(storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end())));
                
                // Start exploration
                storm::utility::Stopwatch explorationTime;
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -49,7 +49,6 @@ namespace storm {
                 *
                 * @param targetObservations set of target observations
                 * @param min true if minimum value is to be computed
-                 * @param observationResolutionVector vector containing the resolution to be used for each observation
                 * @param computeRewards true if rewards are to be computed, false if probability is computed
                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
@ -79,7 +78,7 @@ namespace storm {
                 * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                 * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp)
                 */
-                bool buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
+                bool buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);

                /**
                 * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
@ -87,9 +86,9 @@ namespace storm {
                 */
                bool buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);

-                ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);
+                BeliefValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution);
                
-                std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution);
+                std::vector<BeliefValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector);
                
                struct Statistics {
                    Statistics();
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@ -111,7 +111,7 @@ namespace storm {
                return pomdp.getNumberOfChoices(belief.begin()->first);
            }
            
-            Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) {
+            Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution) {
                return triangulateBelief(getBelief(beliefId), resolution);
            }
            
@ -134,7 +134,7 @@ namespace storm {
                return beliefs.size();
            }
            
-            std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+            std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<BeliefValueType> const& observationResolutions) {
                return expandInternal(beliefId, actionIndex, observationResolutions);
            }
            
@ -293,10 +293,10 @@ namespace storm {
                }
            };
            
-            void triangulateBeliefFreudenthal(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) {
+            void triangulateBeliefFreudenthal(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) {
                STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0");
+                STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution");
                StateType numEntries = belief.size();
-                auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
                // Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N
                // Variable names are mostly based on the paper
@ -307,12 +307,12 @@ namespace storm {
                qsRow.reserve(numEntries);
                std::vector<StateType> toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices
                toOriginalIndicesMap.reserve(numEntries);
-                BeliefValueType x = convResolution;
+                BeliefValueType x = resolution;
                for (auto const& entry : belief) {
                    qsRow.push_back(storm::utility::floor(x)); // v
                    sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v
                    toOriginalIndicesMap.push_back(entry.first);
-                    x -= entry.second * convResolution;
+                    x -= entry.second * resolution;
                }
                // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler
                qsRow.push_back(storm::utility::zero<BeliefValueType>());
@ -339,7 +339,7 @@ namespace storm {
                        for (StateType j = 0; j < numEntries; ++j) {
                            BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
                            if (!cc.isZero(gridPointEntry)) {
-                                gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution;
+                                gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution;
                            }
                        }
                        result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
@ -348,17 +348,17 @@ namespace storm {
                }
            }
            
-            void triangulateBeliefDynamic(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) {
+            void triangulateBeliefDynamic(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) {
                // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal
-                uint64_t finalResolution = resolution;
+                STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution");
+                BeliefValueType finalResolution = resolution;
                uint64_t finalResolutionMisses = belief.size() + 1;
                // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these)
-                for (uint64_t currResolution = resolution; currResolution > resolution / 2; --currResolution) {
+                for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) {
                    uint64_t currResMisses = 0;
-                    BeliefValueType currResolutionConverted = storm::utility::convertNumber<BeliefValueType>(currResolution);
                    bool continueWithNextResolution = false;
                    for (auto const& belEntry : belief) {
-                        BeliefValueType product = belEntry.second * currResolutionConverted;
+                        BeliefValueType product = belEntry.second * currResolution;
                        if (!cc.isZero(product - storm::utility::round(product))) {
                            ++currResMisses;
                            if (currResMisses >= finalResolutionMisses) {
@ -384,7 +384,7 @@ namespace storm {
                triangulateBeliefFreudenthal(belief, finalResolution, result);
            }
            
-            Triangulation triangulateBelief(BeliefType const& belief, uint64_t const& resolution) {
+            Triangulation triangulateBelief(BeliefType const& belief, BeliefValueType const& resolution) {
                STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
                Triangulation result;
                // Quickly triangulate Dirac beliefs
@ -392,12 +392,13 @@ namespace storm {
                    result.weights.push_back(storm::utility::one<BeliefValueType>());
                    result.gridPoints.push_back(getOrAddBeliefId(belief));
                } else {
+                    auto ceiledResolution = storm::utility::ceil<BeliefValueType>(resolution);
                    switch (triangulationMode) {
                        case TriangulationMode::Static:
-                            triangulateBeliefFreudenthal(belief, resolution, result);
+                            triangulateBeliefFreudenthal(belief, ceiledResolution, result);
                            break;
                        case TriangulationMode::Dynamic:
-                            triangulateBeliefDynamic(belief, resolution, result);
+                            triangulateBeliefDynamic(belief, ceiledResolution, result);
                            break;
                        default:
                            STORM_LOG_ASSERT(false, "Invalid triangulation mode.");
@ -407,7 +408,7 @@ namespace storm {
                return result;
            }
            
-            std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
+            std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<BeliefValueType>> const& observationTriangulationResolutions = boost::none) {
                std::vector<std::pair<BeliefId, ValueType>> destinations;
                
                BeliefType belief = getBelief(beliefId);