Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp.

6 years ago · ab26b69435
4 changed files with 443 additions and 192 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -0,0 +1,354 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+#include <deque>
+#include <map>
+#include <boost/optional.hpp>
+
+#include "storm/api/properties.h"
+#include "storm/api/verification.h"
+
+#include "storm/storage/BitVector.h"
+#include "storm/utility/macros.h"
+#include "storm-pomdp/storage/BeliefManager.h"
+#include "storm/utility/SignalHandler.h"
+
+namespace storm {
+    namespace builder {
+        template<typename PomdpType>
+        class BeliefMdpExplorer {
+        public:
+            typedef typename PomdpType::ValueType ValueType;
+            typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
+            typedef typename BeliefManagerType::BeliefId BeliefId;
+            typedef uint64_t MdpStateType;
+            
+            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
+                // Intentionally left empty
+            }
+
+            void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
+                // Reset data from potential previous explorations
+                mdpStateToBeliefIdMap.clear();
+                beliefIdToMdpStateMap.clear();
+                beliefIdsWithMdpState.clear();
+                beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false);
+                lowerValueBounds.clear();
+                upperValueBounds.clear();
+                values.clear();
+                mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true);
+                currentRowCount = 0;
+                startOfCurrentRowGroup = 0;
+                mdpActionRewards.clear();
+                exploredMdp = nullptr;
+                
+                // Add some states with special treatment (if requested)
+                if (extraBottomStateValue) {
+                    extraBottomState = getCurrentNumberOfMdpStates();
+                    mdpStateToBeliefIdMap.push_back(beliefManager->noId());
+                    insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
+
+                    startOfCurrentRowGroup = currentRowCount;
+                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
+                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>());
+                    ++currentRowCount;
+                } else {
+                    extraBottomState = boost::none;
+                }
+                if (extraTargetStateValue) {
+                    extraTargetState = getCurrentNumberOfMdpStates();
+                    mdpStateToBeliefIdMap.push_back(beliefManager->noId());
+                    insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
+                    
+                    startOfCurrentRowGroup = currentRowCount;
+                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
+                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>());
+                    ++currentRowCount;
+                    
+                    targetStates.grow(getCurrentNumberOfMdpStates(), false);
+                    targetStates.set(extraTargetState.get(), true);
+                } else {
+                    extraTargetState = boost::none;
+                }
+                
+                // Set up the initial state.
+                initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
+            }
+    
+            bool hasUnexploredState() const {
+                return !beliefIdsToExplore.empty();
+            }
+    
+            BeliefId exploreNextState() {
+                // Set up the matrix builder
+                finishCurrentRow();
+                startOfCurrentRowGroup = currentRowCount;
+                mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
+                ++currentRowCount;
+                
+                // Pop from the queue.
+                auto result = beliefIdsToExplore.front();
+                beliefIdsToExplore.pop_front();
+                return result;
+            }
+            
+            void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
+                // We first insert the entries of the current row in a separate map.
+                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                if (!storm::utility::isZero(bottomStateValue)) {
+                    STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
+                    internalAddTransition(row, extraBottomState.get(), bottomStateValue);
+                }
+                if (!storm::utility::isZero(targetStateValue)) {
+                    STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none.");
+                    internalAddTransition(row, extraTargetState.get(), targetStateValue);
+                }
+            }
+            
+            void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                internalAddTransition(row, getCurrentMdpState(), value);
+            }
+            
+            /*!
+             * Adds the next transition to the given successor belief
+             * @param localActionIndex
+             * @param transitionTarget
+             * @param value
+             * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition.
+             * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true.
+             */
+            bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
+                // We first insert the entries of the current row in a separate map.
+                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                MdpStateType column;
+                if (ignoreNewBeliefs) {
+                    column = getMdpState(transitionTarget);
+                    if (column == noState()) {
+                        return false;
+                    }
+                } else {
+                    column = getOrAddMdpState(transitionTarget);
+                }
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                internalAddTransition(row, column, value);
+                return true;
+            }
+            
+            void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
+                if (currentRowCount >= mdpActionRewards.size()) {
+                    mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
+                }
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
+            }
+            
+            void setCurrentStateIsTarget() {
+                targetStates.grow(getCurrentNumberOfMdpStates(), false);
+                targetStates.set(getCurrentMdpState(), true);
+            }
+            
+            void setCurrentStateIsTruncated() {
+                truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
+                truncatedStates.set(getCurrentMdpState(), true);
+            }
+            
+            void finishExploration() {
+                // Create the tranistion matrix
+                finishCurrentRow();
+                auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
+                
+                // Create a standard labeling
+                storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
+                mdpLabeling.addLabel("init");
+                mdpLabeling.addLabelToState("init", initialMdpState);
+                targetStates.resize(getCurrentNumberOfMdpStates(), false);
+                mdpLabeling.addLabel("target", std::move(targetStates));
+                truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
+                mdpLabeling.addLabel("truncated", std::move(truncatedStates));
+                
+                // Create a standard reward model (if rewards are available)
+                std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
+                if (!mdpActionRewards.empty()) {
+                    mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
+                    mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
+                }
+                
+                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
+                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
+            }
+            
+            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
+                STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
+                return exploredMdp;
+            }
+            
+            MdpStateType getCurrentNumberOfMdpStates() const {
+                return mdpStateToBeliefIdMap.size();
+            }
+    
+            MdpStateType getCurrentNumberOfMdpChoices() const {
+                return currentRowCount;
+            }
+
+            ValueType getLowerValueBoundAtCurrentState() const {
+                return lowerValueBounds[getCurrentMdpState()];
+            }
+
+            ValueType getUpperValueBoundAtCurrentState() const {
+                return upperValueBounds[getCurrentMdpState()];
+            }
+
+            ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
+                return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
+            }
+
+            ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
+                 return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
+            }
+            
+            std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
+                STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
+                auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
+                auto task = createStandardCheckTask(property);
+                
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task));
+                if (res) {
+                    values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                } else {
+                    STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
+                    STORM_LOG_ERROR("No result obtained while checking.");
+                }
+                return values;
+            }
+            
+            ValueType const& getComputedValueAtInitialState() const {
+                STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
+                return values[exploredMdp->getInitialStates().getNextSetIndex(0)];
+            }
+            
+        private:
+            MdpStateType noState() const {
+                return std::numeric_limits<MdpStateType>::max();
+            }
+            
+            std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) {
+                std::string propertyString = computeRewards ? "R" : "P";
+                propertyString += storm::solver::minimize(dir) ? "min" : "max";
+                propertyString += "=? [F \"target\"]";
+                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
+                return storm::api::extractFormulasFromProperties(propertyVector).front();
+            }
+            
+            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) {
+                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
+                // Therefore, this method needs the property by reference (and not const reference)
+                auto task = storm::api::createTask<ValueType>(property, false);
+                auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
+                hint.setResultHint(values);
+                auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
+                task.setHint(hintPtr);
+                return task;
+            }
+            
+            MdpStateType getCurrentMdpState() const {
+                return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1;
+            }
+            
+            MdpStateType getCurrentBeliefId() const {
+                return mdpStateToBeliefIdMap[getCurrentMdpState()];
+            }
+            
+            void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
+                // We first insert the entries of the current row in a separate map.
+                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row.");
+                if (row >= currentRowCount) {
+                    // We are going to start a new row, so insert the entries of the old one
+                    finishCurrentRow();
+                    currentRowCount = row + 1;
+                }
+                STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state.");
+                mdpTransitionsBuilderCurrentRowEntries[column] = value;
+            }
+            
+            void finishCurrentRow() {
+                for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) {
+                    mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second);
+                }
+                mdpTransitionsBuilderCurrentRowEntries.clear();
+            }
+            
+            MdpStateType getMdpState(BeliefId const& beliefId) const {
+                if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) {
+                    return beliefIdToMdpStateMap.at(beliefId);
+                } else {
+                    return noState();
+                }
+            }
+            
+            void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) {
+                lowerValueBounds.push_back(lowerBound);
+                upperValueBounds.push_back(upperBound);
+                // Take the middle value as a hint
+                values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
+                STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states.");
+                STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size.");
+            }
+            
+            MdpStateType getOrAddMdpState(BeliefId const& beliefId) {
+                beliefIdsWithMdpState.grow(beliefId + 1, false);
+                if (beliefIdsWithMdpState.get(beliefId)) {
+                    return beliefIdToMdpStateMap[beliefId];
+                } else {
+                    // Add a new MDP state
+                    beliefIdsWithMdpState.set(beliefId, true);
+                    MdpStateType result = getCurrentNumberOfMdpStates();
+                    assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
+                    mdpStateToBeliefIdMap.push_back(beliefId);
+                    beliefIdToMdpStateMap[beliefId] = result;
+                    // This new belief needs exploration
+                    beliefIdsToExplore.push_back(beliefId);
+                    
+                    insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
+                    return result;
+                }
+            }
+            
+            // Belief state related information
+            std::shared_ptr<BeliefManagerType> beliefManager;
+            std::vector<BeliefId> mdpStateToBeliefIdMap;
+            std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
+            storm::storage::BitVector beliefIdsWithMdpState;
+            
+            // Exploration information
+            std::deque<uint64_t> beliefIdsToExplore;
+            storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder;
+            std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries;
+            std::vector<ValueType> mdpActionRewards;
+            uint64_t startOfCurrentRowGroup;
+            uint64_t currentRowCount;
+            
+            // Special states during exploration
+            boost::optional<MdpStateType> extraTargetState;
+            boost::optional<MdpStateType> extraBottomState;
+            storm::storage::BitVector targetStates;
+            storm::storage::BitVector truncatedStates;
+            MdpStateType initialMdpState;
+
+            // Final Mdp
+            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
+            
+            // Value related information
+            std::vector<ValueType> const& pomdpLowerValueBounds;
+            std::vector<ValueType> const& pomdpUpperValueBounds;
+            std::vector<ValueType> lowerValueBounds;
+            std::vector<ValueType> upperValueBounds;
+            std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
+            
+        };
+    }
+}
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -22,7 +22,8 @@
 #include "storm/api/properties.h"
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
-
+#include "storm-pomdp/builder/BeliefMdpExplorer.h"
+#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"

 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
@ -57,6 +58,10 @@ namespace storm {
                std::unique_ptr<POMDPCheckResult<ValueType>> result;
                // Extract the relevant information from the formula
                auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
+                
+                // Compute some initial bounds on the values for each state of the pomdp
+                auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
+                
                if (formulaInfo.isNonNestedReachabilityProbability()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
@ -68,7 +73,7 @@ namespace storm {
                    if (options.doRefinement) {
                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
                    } else {
-                        result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
+                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                    }
                } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
@ -78,7 +83,7 @@ namespace storm {
                    } else {
                        // FIXME: pick the non-unique reward model here
                        STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
-                        result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
+                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                    }
                } else {
                    STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
@ -233,8 +238,8 @@ namespace storm {
                uint64_t refinementCounter = 1;
                STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
                std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
-                                                                                                  initialOverApproxMap,
-                                                                                                  initialUnderApproxMap, underApproxModelSize);
+                                                                                                  {},
+                                                                                                  {}, underApproxModelSize);
                if (res == nullptr) {
                    statistics.refinementSteps = 0;
                    return nullptr;
@ -335,14 +340,14 @@ namespace storm {
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                             std::vector<uint64_t> &observationResolutionVector,
                                                                                             bool computeRewards,
-                                                                                             boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                             boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
+                                                                                             std::vector<ValueType> const& lowerPomdpValueBounds,
+                                                                                             std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                             uint64_t maxUaModelSize) {
                STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
-                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap,
-                                                         underApproximationMap, maxUaModelSize);
+                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
+                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
+                                                         upperPomdpValueBounds, maxUaModelSize);
                if (result == nullptr) {
                    return nullptr;
                }
@ -353,8 +358,6 @@ namespace storm {
                }
            }

-
-            
            template <typename ValueType, typename BeliefType, typename SummandsType>
            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
                ValueType result = storm::utility::zero<ValueType>();
@ -369,155 +372,64 @@ namespace storm {
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                                 std::vector<uint64_t> &observationResolutionVector,
                                                                                                 bool computeRewards,
-                                                                                                 boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                                 boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
+                                                                                                 std::vector<ValueType> const& lowerPomdpValueBounds,
+                                                                                                 std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                                 uint64_t maxUaModelSize) {
-                bool boundMapsSet = overApproximationMap && underApproximationMap;
-                std::map<uint64_t, ValueType> overMap;
-                std::map<uint64_t, ValueType> underMap;
-                if (boundMapsSet) {
-                    overMap = overApproximationMap.value();
-                    underMap = underApproximationMap.value();
-                }
-
+                
                auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
                if (computeRewards) {
                    beliefManager->setRewardModel(); // TODO: get actual name
                }
                
-                bsmap_type beliefStateMap;
-
-                std::deque<uint64_t> beliefsToBeExpanded;
-
                statistics.overApproximationBuildTime.start();
-                // Initial belief always has belief ID 0
-                auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief());
-                // These are the components to build the MDP from the grid
-                // Reserve states 0 and 1 as always sink/goal states
-                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
-                uint64_t extraBottomState = 0;
-                uint64_t extraTargetState = computeRewards ? 0 : 1;
-                uint64_t nextMdpStateId = extraTargetState + 1;
-                uint64_t mdpMatrixRow = 0;
-                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
-                    ++mdpMatrixRow;
-                }
-                // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
-                std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
-                if (!computeRewards) {
-                    hintVector[extraTargetState] = storm::utility::one<ValueType>();
-                }
-                std::vector<uint64_t> targetStates = {extraTargetState};
-                storm::storage::BitVector fullyExpandedStates;
-
-                // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
-                std::map<uint64_t, ValueType> weightedSumOverMap;
-                std::map<uint64_t, ValueType> weightedSumUnderMap;
-
-                // for the initial belief, add the triangulated initial states
-                auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]);
-                uint64_t initialMdpState = nextMdpStateId;
-                ++nextMdpStateId;
-                if (triangulation.size() == 1) {
-                    // The initial belief is on the grid itself
-                    auto initBeliefId = triangulation.gridPoints.front();
-                    if (boundMapsSet) {
-                        auto const& gridPoint = beliefManager->getBelief(initBeliefId);
-                        weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
-                        weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
-                    }
-                    beliefsToBeExpanded.push_back(initBeliefId);
-                    beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
-                    hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                    : storm::utility::zero<ValueType>());
+                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                if (computeRewards) {
+                    explorer.startNewExploration(storm::utility::zero<ValueType>());
                } else {
-                    // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    for (uint64_t i = 0; i < triangulation.size(); ++i) {
-                        beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
-                        beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
-                        ++nextMdpStateId;
-                        if (boundMapsSet) {
-                            auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]);
-                            weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
-                            weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
-                        }
-                        hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                    : storm::utility::zero<ValueType>());
-                    }
-                    //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
-                    ++mdpMatrixRow;
+                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
                }
                
                // Expand the beliefs to generate the grid on-the-fly
                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
-                storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
-                for (auto const& belId : beliefsToBeExpanded) {
-                    foundBeliefs.set(belId, true);
-                }
-                while (!beliefsToBeExpanded.empty()) {
-                    uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
+                while (explorer.hasUnexploredState()) {
+                    uint64_t currId = explorer.exploreNextState();
                    
-                    uint64_t currMdpState = beliefStateMap.left.at(currId);
                    uint32_t currObservation = beliefManager->getBeliefObservation(currId);
-
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    
                    if (targetObservations.count(currObservation) != 0) {
-                        // Make this state absorbing
-                        targetStates.push_back(currMdpState);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
-                        ++mdpMatrixRow;
-                    } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
-                        // TODO: with rewards we would have to assign the corresponding reward to this transition
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
-                        ++mdpMatrixRow;
+                        explorer.setCurrentStateIsTarget();
+                        explorer.addSelfloopTransition();
                    } else {
-                        fullyExpandedStates.grow(nextMdpStateId, false);
-                        fullyExpandedStates.set(currMdpState, true);
-                        uint64_t someState = beliefManager->getBelief(currId).begin()->first;
-                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
-
-                        for (uint64_t action = 0; action < numChoices; ++action) {
+                        bool stopExploration = false;
+                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                            stopExploration = true;
+                            explorer.setCurrentStateIsTruncated();
+                        }
+                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
+                            ValueType truncationProbability = storm::utility::zero<ValueType>();
+                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
                            auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
-                            // Check for newly found grid points
-                            foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
                            for (auto const& successor : successorGridPoints) {
-                                auto successorId = successor.first;
-                                auto const& successorBelief = beliefManager->getBelief(successorId);
-                                auto successorObservation = beliefManager->getBeliefObservation(successorBelief);
-                                if (!foundBeliefs.get(successorId)) {
-                                    foundBeliefs.set(successorId);
-                                    beliefsToBeExpanded.push_back(successorId);
-                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
-                                    ++nextMdpStateId;
-                                    
-                                    if (boundMapsSet) {
-                                        ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
-                                        ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
-                                        if (cc.isEqual(upperBound, lowerBound)) {
-                                            hintVector.push_back(lowerBound);
-                                        } else {
-                                            hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
-                                        }
-                                        weightedSumOverMap[successorId] = upperBound;
-                                        weightedSumUnderMap[successorId] = lowerBound;
-                                    } else {
-                                        hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
-                                    }
+                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                if (!added) {
+                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                    // We did not explore this successor state. Get a bound on the "missing" value
+                                    truncationProbability += successor.second;
+                                    truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first));
                                }
-                                auto successorMdpState = beliefStateMap.left.at(successorId);
-                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
-                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                            }
-                            ++mdpMatrixRow;
+                            if (stopExploration) {
+                                if (computeRewards) {
+                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
+                                } else {
+                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                }
+                            }
+                            if (computeRewards) {
+                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
+                            }
                        }
                    }
                    if (storm::utility::resources::isTerminate()) {
@ -525,64 +437,30 @@ namespace storm {
                        break;
                    }
                }
-                statistics.overApproximationStates = nextMdpStateId;
-                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
+                statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates();
                if (storm::utility::resources::isTerminate()) {
                    statistics.overApproximationBuildTime.stop();
                    return nullptr;
                }
-                fullyExpandedStates.resize(nextMdpStateId, false);
                
-                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
-                mdpLabeling.addLabel("init");
-                mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", initialMdpState);
-                for (auto targetState : targetStates) {
-                    mdpLabeling.addLabelToState("target", targetState);
-                }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
-                auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
-                    for (auto const &iter : beliefStateMap.left) {
-                        if (fullyExpandedStates.get(iter.second)) {
-                            auto const& currentBelief = beliefManager->getBelief(iter.first);
-                            auto representativeState = currentBelief.begin()->first;
-                            for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
-                                uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
-                            }
-                        }
-                    }
-                    overApproxMdp->addRewardModel("default", mdpRewardModel);
-                }
+                explorer.finishExploration();
                statistics.overApproximationBuildTime.stop();
                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
-                overApproxMdp->printModelInformationToStream(std::cout);
-
-                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
-                auto property = createStandardProperty(min, computeRewards);
-                auto task = createStandardCheckTask(property, std::move(hintVector));
+                explorer.getExploredMdp()->printModelInformationToStream(std::cout);

                statistics.overApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
+                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.overApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return nullptr;
-                }
-                STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
-                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto overApprox = overApproxResultMap[initialMdpState];

                STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
-                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
+                STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
+                
                //auto underApprox = weightedSumUnderMap[initialBelief.id];
                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                    // TODO: return other components needed for refinement.
                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
-                    return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
+                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
                }
                
                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
@ -592,8 +470,8 @@ namespace storm {
                                                        underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
                                                        underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
                */
-                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
+                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
+                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});

            }

@ -930,14 +808,14 @@ namespace storm {
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
+              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
            }

            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
+              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
            }
            
            
@ -1191,7 +1069,7 @@ namespace storm {
                            auto representativeState = currentBelief.begin()->first;
                            for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
                                uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
+                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action));
                            }
                        }
                    }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -117,8 +117,7 @@ namespace storm {
                 */
                std::shared_ptr<RefinementComponents<ValueType>>
                computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
-                                           bool computeRewards, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                           boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
+                                           bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);

                std::shared_ptr<RefinementComponents<ValueType>>
                computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
@ -140,10 +139,8 @@ namespace storm {
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
-                                       std::vector<uint64_t> &observationResolutionVector, bool computeRewards,
-                                       boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                       boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
+                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
+                                       std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);

                /**
                 * Helper to compute an underapproximation of the reachability property.
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@ -45,6 +45,8 @@ namespace storm {
            };
            
            BeliefType const& getBelief(BeliefId const& id) const {
+                STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief.");
+                STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range.");
                return beliefs[id];
            }
            
@ -54,6 +56,10 @@ namespace storm {
                return idIt->second;
            }
            
+            BeliefId noId() const {
+                return std::numeric_limits<BeliefId>::max();
+            }
+            
            std::string toString(BeliefType const& belief) const {
                std::stringstream str;
                str << "{ ";
@ -180,11 +186,22 @@ namespace storm {
                return true;
            }
            
+            template <typename SummandsType>
+            ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) {
+                ValueType result = storm::utility::zero<ValueType>();
+                for (auto const& entry : getBelief(beliefId)) {
+                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
+                }
+                return result;
+            }
+            
+            
            BeliefId const& getInitialBelief() const {
                return initialBeliefId;
            }
            
-            ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
+            ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const {
+                auto const& belief = getBelief(beliefId);
                STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
                auto result = storm::utility::zero<ValueType>();
                auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
@ -206,6 +223,11 @@ namespace storm {
                return getBeliefObservation(getBelief(beliefId));
            }
            
+            uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
+                auto belief = getBelief(beliefId);
+                return pomdp.getNumberOfChoices(belief.begin()->first);
+            }
+            

            Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                //TODO this can also be simplified using the sparse vector interpretation