Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp.

6 years ago · ab26b69435
4 changed files with 443 additions and 192 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -0,0 +1,354 @@
 #pragma once
 #include <memory>
 #include <vector>
 #include <deque>
 #include <map>
 #include <boost/optional.hpp>
 #include "storm/api/properties.h"
 #include "storm/api/verification.h"
 #include "storm/storage/BitVector.h"
 #include "storm/utility/macros.h"
 #include "storm-pomdp/storage/BeliefManager.h"
 #include "storm/utility/SignalHandler.h"
 namespace storm {
    namespace builder {
        template<typename PomdpType>
        class BeliefMdpExplorer {
        public:
            typedef typename PomdpType::ValueType ValueType;
            typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
            typedef typename BeliefManagerType::BeliefId BeliefId;
            typedef uint64_t MdpStateType;
            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
                // Intentionally left empty
            }
            void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
                // Reset data from potential previous explorations
                mdpStateToBeliefIdMap.clear();
                beliefIdToMdpStateMap.clear();
                beliefIdsWithMdpState.clear();
                beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false);
                lowerValueBounds.clear();
                upperValueBounds.clear();
                values.clear();
                mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true);
                currentRowCount = 0;
                startOfCurrentRowGroup = 0;
                mdpActionRewards.clear();
                exploredMdp = nullptr;
                // Add some states with special treatment (if requested)
                if (extraBottomStateValue) {
                    extraBottomState = getCurrentNumberOfMdpStates();
                    mdpStateToBeliefIdMap.push_back(beliefManager->noId());
                    insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
                    startOfCurrentRowGroup = currentRowCount;
                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>());
                    ++currentRowCount;
                } else {
                    extraBottomState = boost::none;
                }
                if (extraTargetStateValue) {
                    extraTargetState = getCurrentNumberOfMdpStates();
                    mdpStateToBeliefIdMap.push_back(beliefManager->noId());
                    insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
                    startOfCurrentRowGroup = currentRowCount;
                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>());
                    ++currentRowCount;
                    targetStates.grow(getCurrentNumberOfMdpStates(), false);
                    targetStates.set(extraTargetState.get(), true);
                } else {
                    extraTargetState = boost::none;
                }
                // Set up the initial state.
                initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
            }
            bool hasUnexploredState() const {
                return !beliefIdsToExplore.empty();
            }
            BeliefId exploreNextState() {
                // Set up the matrix builder
                finishCurrentRow();
                startOfCurrentRowGroup = currentRowCount;
                mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
                ++currentRowCount;
                // Pop from the queue.
                auto result = beliefIdsToExplore.front();
                beliefIdsToExplore.pop_front();
                return result;
            }
            void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
                // We first insert the entries of the current row in a separate map.
                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                uint64_t row = startOfCurrentRowGroup + localActionIndex;
                if (!storm::utility::isZero(bottomStateValue)) {
                    STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
                    internalAddTransition(row, extraBottomState.get(), bottomStateValue);
                }
                if (!storm::utility::isZero(targetStateValue)) {
                    STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none.");
                    internalAddTransition(row, extraTargetState.get(), targetStateValue);
                }
            }
            void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
                uint64_t row = startOfCurrentRowGroup + localActionIndex;
                internalAddTransition(row, getCurrentMdpState(), value);
            }
            /*!
             * Adds the next transition to the given successor belief
             * @param localActionIndex
             * @param transitionTarget
             * @param value
             * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition.
             * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true.
             */
            bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
                // We first insert the entries of the current row in a separate map.
                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                MdpStateType column;
                if (ignoreNewBeliefs) {
                    column = getMdpState(transitionTarget);
                    if (column == noState()) {
                        return false;
                    }
                } else {
                    column = getOrAddMdpState(transitionTarget);
                }
                uint64_t row = startOfCurrentRowGroup + localActionIndex;
                internalAddTransition(row, column, value);
                return true;
            }
            void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
                if (currentRowCount >= mdpActionRewards.size()) {
                    mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
                }
                uint64_t row = startOfCurrentRowGroup + localActionIndex;
                mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
            }
            void setCurrentStateIsTarget() {
                targetStates.grow(getCurrentNumberOfMdpStates(), false);
                targetStates.set(getCurrentMdpState(), true);
            }
            void setCurrentStateIsTruncated() {
                truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
                truncatedStates.set(getCurrentMdpState(), true);
            }
            void finishExploration() {
                // Create the tranistion matrix
                finishCurrentRow();
                auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
                // Create a standard labeling
                storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
                mdpLabeling.addLabel("init");
                mdpLabeling.addLabelToState("init", initialMdpState);
                targetStates.resize(getCurrentNumberOfMdpStates(), false);
                mdpLabeling.addLabel("target", std::move(targetStates));
                truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
                mdpLabeling.addLabel("truncated", std::move(truncatedStates));
                // Create a standard reward model (if rewards are available)
                std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
                if (!mdpActionRewards.empty()) {
                    mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
                    mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
                }
                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
            }
            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
                STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
                return exploredMdp;
            }
            MdpStateType getCurrentNumberOfMdpStates() const {
                return mdpStateToBeliefIdMap.size();
            }
            MdpStateType getCurrentNumberOfMdpChoices() const {
                return currentRowCount;
            }
            ValueType getLowerValueBoundAtCurrentState() const {
                return lowerValueBounds[getCurrentMdpState()];
            }
            ValueType getUpperValueBoundAtCurrentState() const {
                return upperValueBounds[getCurrentMdpState()];
            }
            ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
                return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
            }
            ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
                 return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
            }
            std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
                STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
                auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
                auto task = createStandardCheckTask(property);
                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task));
                if (res) {
                    values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
                } else {
                    STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
                    STORM_LOG_ERROR("No result obtained while checking.");
                }
                return values;
            }
            ValueType const& getComputedValueAtInitialState() const {
                STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
                return values[exploredMdp->getInitialStates().getNextSetIndex(0)];
            }
        private:
            MdpStateType noState() const {
                return std::numeric_limits<MdpStateType>::max();
            }
            std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) {
                std::string propertyString = computeRewards ? "R" : "P";
                propertyString += storm::solver::minimize(dir) ? "min" : "max";
                propertyString += "=? [F \"target\"]";
                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
                return storm::api::extractFormulasFromProperties(propertyVector).front();
            }
            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) {
                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
                // Therefore, this method needs the property by reference (and not const reference)
                auto task = storm::api::createTask<ValueType>(property, false);
                auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
                hint.setResultHint(values);
                auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
                task.setHint(hintPtr);
                return task;
            }
            MdpStateType getCurrentMdpState() const {
                return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1;
            }
            MdpStateType getCurrentBeliefId() const {
                return mdpStateToBeliefIdMap[getCurrentMdpState()];
            }
            void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
                // We first insert the entries of the current row in a separate map.
                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row.");
                if (row >= currentRowCount) {
                    // We are going to start a new row, so insert the entries of the old one
                    finishCurrentRow();
                    currentRowCount = row + 1;
                }
                STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state.");
                mdpTransitionsBuilderCurrentRowEntries[column] = value;
            }
            void finishCurrentRow() {
                for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) {
                    mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second);
                }
                mdpTransitionsBuilderCurrentRowEntries.clear();
            }
            MdpStateType getMdpState(BeliefId const& beliefId) const {
                if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) {
                    return beliefIdToMdpStateMap.at(beliefId);
                } else {
                    return noState();
                }
            }
            void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) {
                lowerValueBounds.push_back(lowerBound);
                upperValueBounds.push_back(upperBound);
                // Take the middle value as a hint
                values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
                STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states.");
                STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size.");
            }
            MdpStateType getOrAddMdpState(BeliefId const& beliefId) {
                beliefIdsWithMdpState.grow(beliefId + 1, false);
                if (beliefIdsWithMdpState.get(beliefId)) {
                    return beliefIdToMdpStateMap[beliefId];
                } else {
                    // Add a new MDP state
                    beliefIdsWithMdpState.set(beliefId, true);
                    MdpStateType result = getCurrentNumberOfMdpStates();
                    assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
                    mdpStateToBeliefIdMap.push_back(beliefId);
                    beliefIdToMdpStateMap[beliefId] = result;
                    // This new belief needs exploration
                    beliefIdsToExplore.push_back(beliefId);
                    insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
                    return result;
                }
            }
            // Belief state related information
            std::shared_ptr<BeliefManagerType> beliefManager;
            std::vector<BeliefId> mdpStateToBeliefIdMap;
            std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
            storm::storage::BitVector beliefIdsWithMdpState;
            // Exploration information
            std::deque<uint64_t> beliefIdsToExplore;
            storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder;
            std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries;
            std::vector<ValueType> mdpActionRewards;
            uint64_t startOfCurrentRowGroup;
            uint64_t currentRowCount;
            // Special states during exploration
            boost::optional<MdpStateType> extraTargetState;
            boost::optional<MdpStateType> extraBottomState;
            storm::storage::BitVector targetStates;
            storm::storage::BitVector truncatedStates;
            MdpStateType initialMdpState;
            // Final Mdp
            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
            // Value related information
            std::vector<ValueType> const& pomdpLowerValueBounds;
            std::vector<ValueType> const& pomdpUpperValueBounds;
            std::vector<ValueType> lowerValueBounds;
            std::vector<ValueType> upperValueBounds;
            std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
        };
    }
 }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -22,7 +22,8 @@
 #include "storm/api/properties.h"
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
 #include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
@ -57,6 +58,10 @@ namespace storm {
                std::unique_ptr<POMDPCheckResult<ValueType>> result;
                // Extract the relevant information from the formula
                auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
                // Compute some initial bounds on the values for each state of the pomdp
                auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
                if (formulaInfo.isNonNestedReachabilityProbability()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
@ -68,7 +73,7 @@ namespace storm {
                    if (options.doRefinement) {
                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
                    } else {
                        result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                    }
                } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
@ -78,7 +83,7 @@ namespace storm {
                    } else {
                        // FIXME: pick the non-unique reward model here
                        STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
                        result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                    }
                } else {
                    STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
@ -233,8 +238,8 @@ namespace storm {
                uint64_t refinementCounter = 1;
                STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
                std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
                                                                                                  initialOverApproxMap,
                                                                                                  initialUnderApproxMap, underApproxModelSize);
                                                                                                  {},
                                                                                                  {}, underApproxModelSize);
                if (res == nullptr) {
                    statistics.refinementSteps = 0;
                    return nullptr;
@ -335,14 +340,14 @@ namespace storm {
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
                                                                                             std::vector<uint64_t> &observationResolutionVector,
                                                                                             bool computeRewards,
                                                                                             boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
                                                                                             boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
                                                                                             std::vector<ValueType> const& lowerPomdpValueBounds,
                                                                                             std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                             uint64_t maxUaModelSize) {
                STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap,
                                                         underApproximationMap, maxUaModelSize);
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
                                                         upperPomdpValueBounds, maxUaModelSize);
                if (result == nullptr) {
                    return nullptr;
                }
@ -353,8 +358,6 @@ namespace storm {
                }
            }
            template <typename ValueType, typename BeliefType, typename SummandsType>
            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
                ValueType result = storm::utility::zero<ValueType>();
@ -369,155 +372,64 @@ namespace storm {
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                                 std::vector<uint64_t> &observationResolutionVector,
                                                                                                 bool computeRewards,
                                                                                                 boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
                                                                                                 boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
                                                                                                 std::vector<ValueType> const& lowerPomdpValueBounds,
                                                                                                 std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                                 uint64_t maxUaModelSize) {
                bool boundMapsSet = overApproximationMap && underApproximationMap;
                std::map<uint64_t, ValueType> overMap;
                std::map<uint64_t, ValueType> underMap;
                if (boundMapsSet) {
                    overMap = overApproximationMap.value();
                    underMap = underApproximationMap.value();
                }
                auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
                if (computeRewards) {
                    beliefManager->setRewardModel(); // TODO: get actual name
                }
                bsmap_type beliefStateMap;
                std::deque<uint64_t> beliefsToBeExpanded;
                statistics.overApproximationBuildTime.start();
                // Initial belief always has belief ID 0
                auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief());
                // These are the components to build the MDP from the grid
                // Reserve states 0 and 1 as always sink/goal states
                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
                uint64_t extraBottomState = 0;
                uint64_t extraTargetState = computeRewards ? 0 : 1;
                uint64_t nextMdpStateId = extraTargetState + 1;
                uint64_t mdpMatrixRow = 0;
                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
                    ++mdpMatrixRow;
                }
                // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
                std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
                if (!computeRewards) {
                    hintVector[extraTargetState] = storm::utility::one<ValueType>();
                }
                std::vector<uint64_t> targetStates = {extraTargetState};
                storm::storage::BitVector fullyExpandedStates;
                // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
                std::map<uint64_t, ValueType> weightedSumOverMap;
                std::map<uint64_t, ValueType> weightedSumUnderMap;
                // for the initial belief, add the triangulated initial states
                auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]);
                uint64_t initialMdpState = nextMdpStateId;
                ++nextMdpStateId;
                if (triangulation.size() == 1) {
                    // The initial belief is on the grid itself
                    auto initBeliefId = triangulation.gridPoints.front();
                    if (boundMapsSet) {
                        auto const& gridPoint = beliefManager->getBelief(initBeliefId);
                        weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
                        weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
                    }
                    beliefsToBeExpanded.push_back(initBeliefId);
                    beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
                    hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                    : storm::utility::zero<ValueType>());
                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                if (computeRewards) {
                    explorer.startNewExploration(storm::utility::zero<ValueType>());
                } else {
                    // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                    for (uint64_t i = 0; i < triangulation.size(); ++i) {
                        beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
                        beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
                        ++nextMdpStateId;
                        if (boundMapsSet) {
                            auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]);
                            weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
                            weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
                        }
                        hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                    : storm::utility::zero<ValueType>());
                    }
                    //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
                    ++mdpMatrixRow;
                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
                }
                // Expand the beliefs to generate the grid on-the-fly
                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
                storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
                for (auto const& belId : beliefsToBeExpanded) {
                    foundBeliefs.set(belId, true);
                }
                while (!beliefsToBeExpanded.empty()) {
                    uint64_t currId = beliefsToBeExpanded.front();
                    beliefsToBeExpanded.pop_front();
                while (explorer.hasUnexploredState()) {
                    uint64_t currId = explorer.exploreNextState();
                    uint64_t currMdpState = beliefStateMap.left.at(currId);
                    uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                    if (targetObservations.count(currObservation) != 0) {
                        // Make this state absorbing
                        targetStates.push_back(currMdpState);
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
                        ++mdpMatrixRow;
                    } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
                        // TODO: with rewards we would have to assign the corresponding reward to this transition
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
                        ++mdpMatrixRow;
                        explorer.setCurrentStateIsTarget();
                        explorer.addSelfloopTransition();
                    } else {
                        fullyExpandedStates.grow(nextMdpStateId, false);
                        fullyExpandedStates.set(currMdpState, true);
                        uint64_t someState = beliefManager->getBelief(currId).begin()->first;
                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
                        for (uint64_t action = 0; action < numChoices; ++action) {
                        bool stopExploration = false;
                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                            stopExploration = true;
                            explorer.setCurrentStateIsTruncated();
                        }
                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
                            ValueType truncationProbability = storm::utility::zero<ValueType>();
                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
                            auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
                            // Check for newly found grid points
                            foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
                            for (auto const& successor : successorGridPoints) {
                                auto successorId = successor.first;
                                auto const& successorBelief = beliefManager->getBelief(successorId);
                                auto successorObservation = beliefManager->getBeliefObservation(successorBelief);
                                if (!foundBeliefs.get(successorId)) {
                                    foundBeliefs.set(successorId);
                                    beliefsToBeExpanded.push_back(successorId);
                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
                                    ++nextMdpStateId;
                                    if (boundMapsSet) {
                                        ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
                                        ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
                                        if (cc.isEqual(upperBound, lowerBound)) {
                                            hintVector.push_back(lowerBound);
                                        } else {
                                            hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
                                        }
                                        weightedSumOverMap[successorId] = upperBound;
                                        weightedSumUnderMap[successorId] = lowerBound;
                                    } else {
                                        hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
                                    }
                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
                                if (!added) {
                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
                                    // We did not explore this successor state. Get a bound on the "missing" value
                                    truncationProbability += successor.second;
                                    truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first));
                                }
                                auto successorMdpState = beliefStateMap.left.at(successorId);
                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                            }
                            ++mdpMatrixRow;
                            if (stopExploration) {
                                if (computeRewards) {
                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
                                } else {
                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
                                }
                            }
                            if (computeRewards) {
                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
                            }
                        }
                    }
                    if (storm::utility::resources::isTerminate()) {
@ -525,64 +437,30 @@ namespace storm {
                        break;
                    }
                }
                statistics.overApproximationStates = nextMdpStateId;
                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates();
                if (storm::utility::resources::isTerminate()) {
                    statistics.overApproximationBuildTime.stop();
                    return nullptr;
                }
                fullyExpandedStates.resize(nextMdpStateId, false);
                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
                mdpLabeling.addLabel("init");
                mdpLabeling.addLabel("target");
                mdpLabeling.addLabelToState("init", initialMdpState);
                for (auto targetState : targetStates) {
                    mdpLabeling.addLabelToState("target", targetState);
                }
                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
                auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
                if (computeRewards) {
                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
                    for (auto const &iter : beliefStateMap.left) {
                        if (fullyExpandedStates.get(iter.second)) {
                            auto const& currentBelief = beliefManager->getBelief(iter.first);
                            auto representativeState = currentBelief.begin()->first;
                            for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
                                uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
                            }
                        }
                    }
                    overApproxMdp->addRewardModel("default", mdpRewardModel);
                }
                explorer.finishExploration();
                statistics.overApproximationBuildTime.stop();
                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                overApproxMdp->printModelInformationToStream(std::cout);
                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
                auto property = createStandardProperty(min, computeRewards);
                auto task = createStandardCheckTask(property, std::move(hintVector));
                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
                statistics.overApproximationCheckTime.start();
                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.overApproximationCheckTime.stop();
                if (storm::utility::resources::isTerminate() && !res) {
                    return nullptr;
                }
                STORM_LOG_ASSERT(res, "Result does not exist.");
                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                auto overApprox = overApproxResultMap[initialMdpState];
                STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
                STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
                //auto underApprox = weightedSumUnderMap[initialBelief.id];
                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                    // TODO: return other components needed for refinement.
                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
                    return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
                }
                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
@ -592,8 +470,8 @@ namespace storm {
                                                        underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
                                                        underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
                */
                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
            }
@ -930,14 +808,14 @@ namespace storm {
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
            }
@ -1191,7 +1069,7 @@ namespace storm {
                            auto representativeState = currentBelief.begin()->first;
                            for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
                                uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action));
                            }
                        }
                    }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -117,8 +117,7 @@ namespace storm {
                 */
                std::shared_ptr<RefinementComponents<ValueType>>
                computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
                                           bool computeRewards, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
                                           boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
                                           bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
                std::shared_ptr<RefinementComponents<ValueType>>
                computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
@ -140,10 +139,8 @@ namespace storm {
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
                std::unique_ptr<POMDPCheckResult<ValueType>>
                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
                                       std::vector<uint64_t> &observationResolutionVector, bool computeRewards,
                                       boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
                                       boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
                                       std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
                /**
                 * Helper to compute an underapproximation of the reachability property.
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@ -45,6 +45,8 @@ namespace storm {
            };
            BeliefType const& getBelief(BeliefId const& id) const {
                STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief.");
                STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range.");
                return beliefs[id];
            }
@ -54,6 +56,10 @@ namespace storm {
                return idIt->second;
            }
            BeliefId noId() const {
                return std::numeric_limits<BeliefId>::max();
            }
            std::string toString(BeliefType const& belief) const {
                std::stringstream str;
                str << "{ ";
@ -180,11 +186,22 @@ namespace storm {
                return true;
            }
            template <typename SummandsType>
            ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) {
                ValueType result = storm::utility::zero<ValueType>();
                for (auto const& entry : getBelief(beliefId)) {
                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
                }
                return result;
            }
            BeliefId const& getInitialBelief() const {
                return initialBeliefId;
            }
            ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
            ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const {
                auto const& belief = getBelief(beliefId);
                STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
                auto result = storm::utility::zero<ValueType>();
                auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
@ -206,6 +223,11 @@ namespace storm {
                return getBeliefObservation(getBelief(beliefId));
            }
            uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
                auto belief = getBelief(beliefId);
                return pomdp.getNumberOfChoices(belief.begin()->first);
            }
            Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                //TODO this can also be simplified using the sparse vector interpretation