Refactored under-approximation code a bit.

6 years ago · d184d67b53
3 changed files with 199 additions and 10 deletions
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -23,7 +23,6 @@
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"

-#include "storm-pomdp/storage/BeliefGrid.h"

 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
@ -477,7 +476,7 @@ namespace storm {
                        std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;

                        for (uint64_t action = 0; action < numChoices; ++action) {
-                            auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector);
+                            auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector);
                            // Check for newly found grid points
                            foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
                            for (auto const& successor : successorGridPoints) {
@ -1072,6 +1071,158 @@ namespace storm {
                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
            }

+            template<typename ValueType, typename RewardModelType>
+            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
+                                                                                                std::set<uint32_t> const &targetObservations, bool min,
+                                                                                                bool computeRewards, uint64_t maxModelSize) {
+                // Build the belief MDP until enough states are explored.
+                //TODO think of other ways to stop exploration besides model size
+
+                statistics.underApproximationBuildTime.start();
+
+                // Reserve states 0 and 1 as always sink/goal states
+                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
+                uint64_t extraBottomState = 0;
+                uint64_t extraTargetState = computeRewards ? 0 : 1;
+                uint64_t nextMdpStateId = extraTargetState + 1;
+                uint64_t mdpMatrixRow = 0;
+                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
+                    ++mdpMatrixRow;
+                }
+                std::vector<uint64_t> targetStates = {extraTargetState};
+
+                bsmap_type beliefStateMap;
+                std::deque<uint64_t> beliefsToBeExpanded;
+                
+                auto initialBeliefId = beliefGrid.getInitialBelief();
+                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId));
+                beliefsToBeExpanded.push_back(initialBeliefId);
+                ++nextMdpStateId;
+
+                // Expand the believes
+                storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false);
+                for (auto const& belId : beliefsToBeExpanded) {
+                    foundBeliefs.set(belId, true);
+                }
+                while (!beliefsToBeExpanded.empty()) {
+                    uint64_t currId = beliefsToBeExpanded.front();
+                    beliefsToBeExpanded.pop_front();
+                    
+                    uint64_t currMdpState = beliefStateMap.left.at(currId);
+                    auto const& currBelief = beliefGrid.getGridPoint(currId);
+                    uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief);
+                    
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    
+                    if (targetObservations.count(currObservation) != 0) {
+                        // Make this state absorbing
+                        targetStates.push_back(currMdpState);
+                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
+                        ++mdpMatrixRow;
+                    } else if (currMdpState > maxModelSize) {
+                        // In other cases, this could be helpflull as well.
+                        if (min) {
+                            // Get an upper bound here
+                            if (computeRewards) {
+                                // TODO: With minimizing rewards we need an upper bound!
+                                // For now, add a selfloop to "generate" infinite reward
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
+                            } else {
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one<ValueType>());
+                            }
+                        } else {
+                            mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one<ValueType>());
+                        }
+                        ++mdpMatrixRow;
+                    } else {
+                        // Iterate over all actions and add the corresponding transitions
+                        uint64_t someState = currBelief.begin()->first;
+                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
+                        for (uint64_t action = 0; action < numChoices; ++action) {
+                            auto successorBeliefs = beliefGrid.expand(currId, action);
+                            // Check for newly found beliefs
+                            foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false);
+                            for (auto const& successor : successorBeliefs) {
+                                auto successorId = successor.first;
+                                if (!foundBeliefs.get(successorId)) {
+                                    foundBeliefs.set(successorId);
+                                    beliefsToBeExpanded.push_back(successorId);
+                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
+                                    ++nextMdpStateId;
+                                }
+                                auto successorMdpState = beliefStateMap.left.at(successorId);
+                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
+                            }
+                            ++mdpMatrixRow;
+                        }
+                    }
+                    if (storm::utility::resources::isTerminate()) {
+                        statistics.underApproximationBuildAborted = true;
+                        break;
+                    }
+                }
+                statistics.underApproximationStates = nextMdpStateId;
+                if (storm::utility::resources::isTerminate()) {
+                    statistics.underApproximationBuildTime.stop();
+                    return nullptr;
+                }
+                
+                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
+                mdpLabeling.addLabel("init");
+                mdpLabeling.addLabel("target");
+                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId));
+                for (auto targetState : targetStates) {
+                    mdpLabeling.addLabelToState("target", targetState);
+                }
+                
+                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
+                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
+                if (computeRewards) {
+                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
+                    for (auto const &iter : beliefStateMap.left) {
+                        auto currentBelief = beliefGrid.getGridPoint(iter.first);
+                        auto representativeState = currentBelief.begin()->first;
+                        for (uint64_t action = 0; action < model->getNumberOfChoices(representativeState); ++action) {
+                            // Add the reward
+                            uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
+                            uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
+                            mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
+                        }
+                    }
+                    model->addRewardModel("default", mdpRewardModel);
+                    model->restrictRewardModels(std::set<std::string>({"default"}));
+                }
+
+                model->printModelInformationToStream(std::cout);
+                statistics.underApproximationBuildTime.stop();
+
+                std::string propertyString;
+                if (computeRewards) {
+                    propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
+                } else {
+                    propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
+                }
+                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
+                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
+
+                statistics.underApproximationCheckTime.start();
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
+                statistics.underApproximationCheckTime.stop();
+                if (storm::utility::resources::isTerminate() && !res) {
+                    return nullptr;
+                }
+                STORM_LOG_ASSERT(res, "Result does not exist.");
+                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true)));
+                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
+
+                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
+            }
+            

            template<typename ValueType, typename RewardModelType>
            storm::storage::SparseMatrix<ValueType>
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -3,6 +3,7 @@
 #include "storm/models/sparse/Pomdp.h"
 #include "storm/utility/logging.h"
 #include "storm-pomdp/storage/Belief.h"
+#include "storm-pomdp/storage/BeliefGrid.h"
 #include <boost/bimap.hpp>

 #include "storm/storage/jani/Property.h"
@ -162,6 +163,9 @@ namespace storm {
                                                                                                             std::set<uint32_t> const &targetObservations,
                                                                                                             uint64_t initialBeliefId, bool min, bool computeReward,
                                                                                                             uint64_t maxModelSize);
+                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
+                                                                                                             std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
+                                                                                                             uint64_t maxModelSize);

                /**
                 * Constructs the initial belief for the given POMDP
--- a/src/storm-pomdp/storage/BeliefGrid.h
+++ b/src/storm-pomdp/storage/BeliefGrid.h
@ -10,7 +10,8 @@
 namespace storm {
    namespace storage {
        
-        template <typename PomdpType, typename BeliefValueType, typename StateType = uint64_t>
+        template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t>
+        // TODO: Change name. This actually does not only manage grid points.
        class BeliefGrid {
        public:
            
@ -137,6 +138,28 @@ namespace storm {
                return true;
            }
            
+            BeliefId getInitialBelief() {
+                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
+                                 "POMDP contains more than one initial state");
+                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
+                                 "POMDP does not contain an initial state");
+                BeliefType belief;
+                belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>();
+                
+                STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
+                return getOrAddGridPointId(belief);
+            }
+            
+            uint32_t getBeliefObservation(BeliefType belief) {
+                STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief.");
+                return pomdp.getObservation(belief.begin()->first);
+            }
+            
+            uint32_t getBeliefObservation(BeliefId beliefId) {
+                return getBeliefObservation(getGridPoint(beliefId));
+            }
+            
+            
            Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                //TODO this can also be simplified using the sparse vector interpretation
                //TODO Enable chaching for this method?
@ -229,9 +252,9 @@ namespace storm {
                return gridPoints.size();
            }
            
-            std::map<BeliefId, ValueType> expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
-                
+            std::map<BeliefId, ValueType> expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
                std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
+                // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds
                
                BeliefType gridPoint = getGridPoint(gridPointId);
                
@ -247,7 +270,7 @@ namespace storm {
                    }
                }
                
-                // Now for each successor observation we find and triangulate the successor belief
+                // Now for each successor observation we find and potentially triangulate the successor belief
                for (auto const& successor : successorObs) {
                    BeliefType successorBelief;
                    for (auto const& pointEntry : gridPoint) {
@ -261,9 +284,13 @@ namespace storm {
                    }
                    STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
                    
-                    Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]);
-                    for (size_t j = 0; j < triangulation.size(); ++j) {
-                        addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
+                    if (observationTriangulationResolutions) {
+                        Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]);
+                        for (size_t j = 0; j < triangulation.size(); ++j) {
+                            addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
+                        }
+                    } else {
+                        addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second);
                    }
                }
    
@ -271,6 +298,14 @@ namespace storm {
                
            }
            
+            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+                return expandInternal(gridPointId, actionIndex, observationResolutions);
+            }
+            
+            std::map<BeliefId, ValueType> expand(BeliefId const& gridPointId, uint64_t actionIndex) {
+                return expandInternal(gridPointId, actionIndex);
+            }
+            
        private:
            
            BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
@ -284,7 +319,6 @@ namespace storm {
            }
            
            PomdpType const& pomdp;
-            uint64_t resolution;
            
            std::vector<BeliefType> gridPoints;
            std::map<BeliefType, BeliefId> gridPointToIdMap;