From d184d67b53e2c2a7b6ef5af67880b5e0e9746f52 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Sat, 28 Mar 2020 07:24:42 +0100
Subject: [PATCH] Refactored under-approximation code a bit.

---
 .../ApproximatePOMDPModelchecker.cpp          | 155 +++++++++++++++++-
 .../ApproximatePOMDPModelchecker.h            |   4 +
 src/storm-pomdp/storage/BeliefGrid.h          |  50 +++++-
 3 files changed, 199 insertions(+), 10 deletions(-)
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 146d1ff3e..4976f3404 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -23,7 +23,6 @@
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
 
-#include "storm-pomdp/storage/BeliefGrid.h"
 
 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
@@ -477,7 +476,7 @@ namespace storm {
                         std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;
 
                         for (uint64_t action = 0; action < numChoices; ++action) {
-                            auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector);
+                            auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector);
                             // Check for newly found grid points
                             foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
                             for (auto const& successor : successorGridPoints) {
@@ -1072,6 +1071,158 @@ namespace storm {
                 return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
             }
 
+            template<typename ValueType, typename RewardModelType>
+            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
+                                                                                                std::set<uint32_t> const &targetObservations, bool min,
+                                                                                                bool computeRewards, uint64_t maxModelSize) {
+                // Build the belief MDP until enough states are explored.
+                //TODO think of other ways to stop exploration besides model size
+
+                statistics.underApproximationBuildTime.start();
+
+                // Reserve states 0 and 1 as always sink/goal states
+                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
+                uint64_t extraBottomState = 0;
+                uint64_t extraTargetState = computeRewards ? 0 : 1;
+                uint64_t nextMdpStateId = extraTargetState + 1;
+                uint64_t mdpMatrixRow = 0;
+                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
+                    ++mdpMatrixRow;
+                }
+                std::vector<uint64_t> targetStates = {extraTargetState};
+
+                bsmap_type beliefStateMap;
+                std::deque<uint64_t> beliefsToBeExpanded;
+                
+                auto initialBeliefId = beliefGrid.getInitialBelief();
+                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId));
+                beliefsToBeExpanded.push_back(initialBeliefId);
+                ++nextMdpStateId;
+
+                // Expand the believes
+                storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false);
+                for (auto const& belId : beliefsToBeExpanded) {
+                    foundBeliefs.set(belId, true);
+                }
+                while (!beliefsToBeExpanded.empty()) {
+                    uint64_t currId = beliefsToBeExpanded.front();
+                    beliefsToBeExpanded.pop_front();
+                    
+                    uint64_t currMdpState = beliefStateMap.left.at(currId);
+                    auto const& currBelief = beliefGrid.getGridPoint(currId);
+                    uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief);
+                    
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    
+                    if (targetObservations.count(currObservation) != 0) {
+                        // Make this state absorbing
+                        targetStates.push_back(currMdpState);
+                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
+                        ++mdpMatrixRow;
+                    } else if (currMdpState > maxModelSize) {
+                        // In other cases, this could be helpflull as well.
+                        if (min) {
+                            // Get an upper bound here
+                            if (computeRewards) {
+                                // TODO: With minimizing rewards we need an upper bound!
+                                // For now, add a selfloop to "generate" infinite reward
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
+                            } else {
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one<ValueType>());
+                            }
+                        } else {
+                            mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one<ValueType>());
+                        }
+                        ++mdpMatrixRow;
+                    } else {
+                        // Iterate over all actions and add the corresponding transitions
+                        uint64_t someState = currBelief.begin()->first;
+                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
+                        for (uint64_t action = 0; action < numChoices; ++action) {
+                            auto successorBeliefs = beliefGrid.expand(currId, action);
+                            // Check for newly found beliefs
+                            foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false);
+                            for (auto const& successor : successorBeliefs) {
+                                auto successorId = successor.first;
+                                if (!foundBeliefs.get(successorId)) {
+                                    foundBeliefs.set(successorId);
+                                    beliefsToBeExpanded.push_back(successorId);
+                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
+                                    ++nextMdpStateId;
+                                }
+                                auto successorMdpState = beliefStateMap.left.at(successorId);
+                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
+                            }
+                            ++mdpMatrixRow;
+                        }
+                    }
+                    if (storm::utility::resources::isTerminate()) {
+                        statistics.underApproximationBuildAborted = true;
+                        break;
+                    }
+                }
+                statistics.underApproximationStates = nextMdpStateId;
+                if (storm::utility::resources::isTerminate()) {
+                    statistics.underApproximationBuildTime.stop();
+                    return nullptr;
+                }
+                
+                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
+                mdpLabeling.addLabel("init");
+                mdpLabeling.addLabel("target");
+                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId));
+                for (auto targetState : targetStates) {
+                    mdpLabeling.addLabelToState("target", targetState);
+                }
+                
+                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
+                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
+                if (computeRewards) {
+                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
+                    for (auto const &iter : beliefStateMap.left) {
+                        auto currentBelief = beliefGrid.getGridPoint(iter.first);
+                        auto representativeState = currentBelief.begin()->first;
+                        for (uint64_t action = 0; action < model->getNumberOfChoices(representativeState); ++action) {
+                            // Add the reward
+                            uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
+                            uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
+                            mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
+                        }
+                    }
+                    model->addRewardModel("default", mdpRewardModel);
+                    model->restrictRewardModels(std::set<std::string>({"default"}));
+                }
+
+                model->printModelInformationToStream(std::cout);
+                statistics.underApproximationBuildTime.stop();
+
+                std::string propertyString;
+                if (computeRewards) {
+                    propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
+                } else {
+                    propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
+                }
+                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
+                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
+
+                statistics.underApproximationCheckTime.start();
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
+                statistics.underApproximationCheckTime.stop();
+                if (storm::utility::resources::isTerminate() && !res) {
+                    return nullptr;
+                }
+                STORM_LOG_ASSERT(res, "Result does not exist.");
+                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true)));
+                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
+
+                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
+            }
+            
 
             template<typename ValueType, typename RewardModelType>
             storm::storage::SparseMatrix<ValueType>
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 4820ea380..32daa0876 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -3,6 +3,7 @@
 #include "storm/models/sparse/Pomdp.h"
 #include "storm/utility/logging.h"
 #include "storm-pomdp/storage/Belief.h"
+#include "storm-pomdp/storage/BeliefGrid.h"
 #include <boost/bimap.hpp>
 
 #include "storm/storage/jani/Property.h"
@@ -162,6 +163,9 @@ namespace storm {
                                                                                                              std::set<uint32_t> const &targetObservations,
                                                                                                              uint64_t initialBeliefId, bool min, bool computeReward,
                                                                                                              uint64_t maxModelSize);
+                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
+                                                                                                             std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
+                                                                                                             uint64_t maxModelSize);
 
                 /**
                  * Constructs the initial belief for the given POMDP
diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h
index a75872ee4..48da1b85b 100644
--- a/src/storm-pomdp/storage/BeliefGrid.h
+++ b/src/storm-pomdp/storage/BeliefGrid.h
@@ -10,7 +10,8 @@
 namespace storm {
     namespace storage {
         
-        template <typename PomdpType, typename BeliefValueType, typename StateType = uint64_t>
+        template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t>
+        // TODO: Change name. This actually does not only manage grid points.
         class BeliefGrid {
         public:
             
@@ -137,6 +138,28 @@ namespace storm {
                 return true;
             }
             
+            BeliefId getInitialBelief() {
+                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
+                                 "POMDP contains more than one initial state");
+                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
+                                 "POMDP does not contain an initial state");
+                BeliefType belief;
+                belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>();
+                
+                STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
+                return getOrAddGridPointId(belief);
+            }
+            
+            uint32_t getBeliefObservation(BeliefType belief) {
+                STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief.");
+                return pomdp.getObservation(belief.begin()->first);
+            }
+            
+            uint32_t getBeliefObservation(BeliefId beliefId) {
+                return getBeliefObservation(getGridPoint(beliefId));
+            }
+            
+            
             Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                 //TODO this can also be simplified using the sparse vector interpretation
                 //TODO Enable chaching for this method?
@@ -229,9 +252,9 @@ namespace storm {
                 return gridPoints.size();
             }
             
-            std::map<BeliefId, ValueType> expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
-                
+            std::map<BeliefId, ValueType> expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
                 std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
+                // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds
                 
                 BeliefType gridPoint = getGridPoint(gridPointId);
                 
@@ -247,7 +270,7 @@ namespace storm {
                     }
                 }
                 
-                // Now for each successor observation we find and triangulate the successor belief
+                // Now for each successor observation we find and potentially triangulate the successor belief
                 for (auto const& successor : successorObs) {
                     BeliefType successorBelief;
                     for (auto const& pointEntry : gridPoint) {
@@ -261,9 +284,13 @@ namespace storm {
                     }
                     STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
                     
-                    Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]);
-                    for (size_t j = 0; j < triangulation.size(); ++j) {
-                        addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
+                    if (observationTriangulationResolutions) {
+                        Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]);
+                        for (size_t j = 0; j < triangulation.size(); ++j) {
+                            addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
+                        }
+                    } else {
+                        addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second);
                     }
                 }
     
@@ -271,6 +298,14 @@ namespace storm {
                 
             }
             
+            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+                return expandInternal(gridPointId, actionIndex, observationResolutions);
+            }
+            
+            std::map<BeliefId, ValueType> expand(BeliefId const& gridPointId, uint64_t actionIndex) {
+                return expandInternal(gridPointId, actionIndex);
+            }
+            
         private:
             
             BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
@@ -284,7 +319,6 @@ namespace storm {
             }
             
             PomdpType const& pomdp;
-            uint64_t resolution;
             
             std::vector<BeliefType> gridPoints;
             std::map<BeliefType, BeliefId> gridPointToIdMap;