POMDP: Started to split belief logic from exploration logic.

6 years ago · 6fee61feb1
3 changed files with 453 additions and 189 deletions
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -1,5 +1,7 @@
 #include "ApproximatePOMDPModelchecker.h"
 #include <tuple>
 #include <boost/algorithm/string.hpp>
 #include "storm-pomdp/analysis/FormulaInformation.h"
@ -21,6 +23,8 @@
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
 #include "storm-pomdp/storage/BeliefGrid.h"
 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
 #include "storm/exceptions/NotSupportedException.h"
@ -322,6 +326,16 @@ namespace storm {
            }
            template <typename ValueType, typename BeliefType, typename SummandsType>
            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
                ValueType result = storm::utility::zero<ValueType>();
                for (auto const& entry : belief) {
                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
                }
                return result;
            }
            template<typename ValueType, typename RewardModelType>
            std::shared_ptr<RefinementComponents<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
@ -338,15 +352,14 @@ namespace storm {
                    underMap = underApproximationMap.value();
                }
                std::vector<storm::pomdp::Belief<ValueType>> beliefList;
                std::vector<bool> beliefIsTarget;
                std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
                storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>, ValueType> newBeliefGrid(pomdp, options.numericPrecision);
                //Use caching to avoid multiple computation of the subsimplices and lambdas
                std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
                std::map<uint64_t, std::vector<ValueType>> lambdaCache;
                bsmap_type beliefStateMap;
                std::deque<uint64_t> beliefsToBeExpanded;
                storm::storage::BitVector expandedBeliefs;
                // current ID -> action -> reward
                std::map<uint64_t, std::vector<ValueType>> beliefActionRewards;
@ -355,244 +368,185 @@ namespace storm {
                // Initial belief always has belief ID 0
                storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(nextId);
                ++nextId;
                beliefList.push_back(initialBelief);
                beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
                // These are the components to build the MDP from the grid
                // Reserve states 0 and 1 as always sink/goal states
                std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}},
                                                                                          {{{1, storm::utility::one<ValueType>()}}}};
                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
                uint64_t extraBottomState = 0;
                uint64_t extraTargetState = computeRewards ? 0 : 1;
                uint64_t nextMdpStateId = extraTargetState + 1;
                uint64_t mdpMatrixRow = 0;
                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
                    ++mdpMatrixRow;
                }
                // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
                std::vector<ValueType> hintVector = {storm::utility::zero<ValueType>(), storm::utility::one<ValueType>()};
                std::vector<uint64_t> targetStates = {1};
                uint64_t mdpStateId = 2;
                beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId));
                ++mdpStateId;
                std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
                if (!computeRewards) {
                    hintVector[extraTargetState] = storm::utility::one<ValueType>();
                }
                std::vector<uint64_t> targetStates = {extraTargetState};
                // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
                std::map<uint64_t, ValueType> weightedSumOverMap;
                std::map<uint64_t, ValueType> weightedSumUnderMap;
                // for the initial belief, add the triangulated initial states
                auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates());
                auto initSubSimplex = initTemp.first;
                auto initLambdas = initTemp.second;
                auto triangulation = newBeliefGrid.triangulateBelief(initialBelief.probabilities, observationResolutionVector[initialBelief.observation]);
                if (options.cacheSubsimplices) {
                    subSimplexCache[0] = initSubSimplex;
                    lambdaCache[0] = initLambdas;
                    //subSimplexCache[0] = initSubSimplex;
                    //lambdaCache[0] = initLambdas;
                }
                std::vector<std::map<uint64_t, ValueType>> initTransitionsInBelief;
                std::map<uint64_t, ValueType> initTransitionInActionBelief;
                bool initInserted = false;
                for (size_t j = 0; j < initLambdas.size(); ++j) {
                    if (!cc.isEqual(initLambdas[j], storm::utility::zero<ValueType>())) {
                        uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]);
                        if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) {
                            if (searchResult == 0) {
                                // the initial belief is on the grid itself
                                if (boundMapsSet) {
                                    auto tempWeightedSumOver = storm::utility::zero<ValueType>();
                                    auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
                                    for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
                                        tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
                                        tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
                                    }
                                    weightedSumOverMap[initialBelief.id] = tempWeightedSumOver;
                                    weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder;
                                }
                                initInserted = true;
                                beliefGrid.push_back(initialBelief);
                                beliefsToBeExpanded.push_back(0);
                                hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                    : storm::utility::zero<ValueType>());
                            } else {
                                // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
                                if (boundMapsSet) {
                                    auto tempWeightedSumOver = storm::utility::zero<ValueType>();
                                    auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
                                    for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
                                        tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
                                        tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
                                    }
                                    weightedSumOverMap[nextId] = tempWeightedSumOver;
                                    weightedSumUnderMap[nextId] = tempWeightedSumUnder;
                                }
                                storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]};
                                beliefList.push_back(gridBelief);
                                beliefGrid.push_back(gridBelief);
                                beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
                                beliefsToBeExpanded.push_back(nextId);
                                ++nextId;
                                hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                uint64_t initialMdpState = nextMdpStateId;
                ++nextMdpStateId;
                if (triangulation.size() == 1) {
                    // The initial belief is on the grid itself
                    auto initBeliefId = triangulation.gridPoints.front();
                    if (boundMapsSet) {
                        auto const& gridPoint = newBeliefGrid.getGridPoint(initBeliefId);
                        weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
                        weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
                    }
                    beliefsToBeExpanded.push_back(initBeliefId);
                    beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
                    hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                    : storm::utility::zero<ValueType>());
                                beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
                                initTransitionInActionBelief[mdpStateId] = initLambdas[j];
                                ++nextId;
                                ++mdpStateId;
                            }
                } else {
                    // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                    for (uint64_t i = 0; i < triangulation.size(); ++i) {
                        beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
                        beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
                        ++nextMdpStateId;
                        if (boundMapsSet) {
                            auto const& gridPoint = newBeliefGrid.getGridPoint(triangulation.gridPoints[i]);
                            weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
                            weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
                        }
                        hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                    : storm::utility::zero<ValueType>());
                    }
                    //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
                    ++mdpMatrixRow;
                }
                // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
                if (!initTransitionInActionBelief.empty()) {
                    initTransitionsInBelief.push_back(initTransitionInActionBelief);
                    mdpTransitions.push_back(initTransitionsInBelief);
                }
                //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
                // Expand the beliefs to generate the grid on-the-fly
                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
                expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
                while (!beliefsToBeExpanded.empty()) {
                    uint64_t currId = beliefsToBeExpanded.front();
                    beliefsToBeExpanded.pop_front();
                    bool isTarget = beliefIsTarget[currId];
                    if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
                        // TODO: with rewards whe would have to assign the corresponding reward to this transition
                        mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
                        continue;
                    }
                    if (isTarget) {
                        // Depending on whether we compute rewards, we select the right initial result
                        // MDP stuff
                        targetStates.push_back(beliefStateMap.left.at(currId));
                        mdpTransitions.push_back({{{beliefStateMap.left.at(currId), storm::utility::one<ValueType>()}}});
                    beliefsToBeExpanded.pop_front();
                    expandedBeliefs.set(currId, true); // Do not expand this belief again.
                    assert(currId < expandedBeliefs.size());
                    uint64_t currMdpState = beliefStateMap.left.at(currId);
                    auto const& currBelief = newBeliefGrid.getGridPoint(currId);
                    uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first);
                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                    if (targetObservations.count(currObservation) != 0) {
                        // Make this state absorbing
                        targetStates.push_back(currMdpState);
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
                        ++mdpMatrixRow;
                    } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
                        // TODO: with rewards we would have to assign the corresponding reward to this transition
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
                        ++mdpMatrixRow;
                    } else {
                        uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front();
                        uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
                        auto const& currBelief = newBeliefGrid.getGridPoint(currId);
                        uint64_t someState = currBelief.begin()->first;
                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
                        std::vector<ValueType> actionRewardsInState(numChoices);
                        std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;
                        for (uint64_t action = 0; action < numChoices; ++action) {
                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action);
                            std::map<uint64_t, ValueType> transitionInActionBelief;
                            for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
                                uint32_t observation = iter->first;
                                // THIS CALL IS SLOW
                                // TODO speed this up
                                uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action,
                                                                                           observation, nextId);
                                nextId = beliefList.size();
                                //Triangulate here and put the possibly resulting belief in the grid
                                std::vector<std::map<uint64_t, ValueType>> subSimplex;
                                std::vector<ValueType> lambdas;
                                if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
                                    subSimplex = subSimplexCache[idNextBelief];
                                    lambdas = lambdaCache[idNextBelief];
                                } else {
                                    auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities,
                                                                            observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates());
                                    subSimplex = temp.first;
                                    lambdas = temp.second;
                                    if (options.cacheSubsimplices) {
                                        subSimplexCache[idNextBelief] = subSimplex;
                                        lambdaCache[idNextBelief] = lambdas;
                                    }
                                }
                                for (size_t j = 0; j < lambdas.size(); ++j) {
                                    if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
                                        auto approxId = getBeliefIdInVector(beliefGrid, observation, subSimplex[j]);
                                        if (approxId == uint64_t(-1)) {
                                            // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
                                            storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]};
                                            beliefList.push_back(gridBelief);
                                            beliefGrid.push_back(gridBelief);
                                            beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
                                            // compute overapproximate value using MDP result map
                                            if (boundMapsSet) {
                                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
                                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
                                                for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
                                                }
                                                if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) {
                                                    hintVector.push_back(tempWeightedSumOver);
                                                } else {
                                                    hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                          : storm::utility::zero<ValueType>());
                                                }
                                                weightedSumOverMap[nextId] = tempWeightedSumOver;
                                                weightedSumUnderMap[nextId] = tempWeightedSumUnder;
                                            } else {
                                                hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                      : storm::utility::zero<ValueType>());
                                            }
                                            beliefsToBeExpanded.push_back(nextId);
                                            beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
                                            transitionInActionBelief[mdpStateId] = iter->second * lambdas[j];
                                            ++nextId;
                                            ++mdpStateId;
                            auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector);
                            // Check for newly found grid points
                            expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
                            for (auto const& successor : successorGridPoints) {
                                auto successorId = successor.first;
                                auto successorBelief = newBeliefGrid.getGridPoint(successorId);
                                auto successorObservation = pomdp.getObservation(successorBelief.begin()->first);
                                if (!expandedBeliefs.get(successorId)) {
                                    beliefsToBeExpanded.push_back(successorId);
                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
                                    ++nextMdpStateId;
                                    if (boundMapsSet) {
                                        ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
                                        ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
                                        if (cc.isEqual(upperBound, lowerBound)) {
                                            hintVector.push_back(lowerBound);
                                        } else {
                                            transitionInActionBelief[beliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
                                            hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
                                        }
                                        weightedSumOverMap[successorId] = upperBound;
                                        weightedSumUnderMap[successorId] = lowerBound;
                                    } else {
                                        hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
                                    }
                                }
                                auto successorMdpState = beliefStateMap.left.at(successorId);
                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                            }
                            if (!transitionInActionBelief.empty()) {
                                transitionsInBelief.push_back(transitionInActionBelief);
                            }
                            ++mdpMatrixRow;
                        }
                        if (transitionsInBelief.empty()) {
                            std::map<uint64_t, ValueType> transitionInActionBelief;
                            transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
                            transitionsInBelief.push_back(transitionInActionBelief);
                        }
                        mdpTransitions.push_back(transitionsInBelief);
                    }
                    if (storm::utility::resources::isTerminate()) {
                        statistics.overApproximationBuildAborted = true;
                        break;
                    }
                }
                statistics.overApproximationStates = mdpTransitions.size();
                STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl);
                statistics.overApproximationStates = nextMdpStateId;
                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                if (storm::utility::resources::isTerminate()) {
                    statistics.overApproximationBuildTime.stop();
                    return nullptr;
                }
                storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size());
                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
                mdpLabeling.addLabel("init");
                mdpLabeling.addLabel("target");
                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id));
                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialMdpState));
                for (auto targetState : targetStates) {
                    mdpLabeling.addLabelToState("target", targetState);
                }
                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling);
                storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
                for (uint64_t row = 0; row < modelComponents.transitionMatrix.getRowCount(); ++row) {
                    if (!storm::utility::isOne(modelComponents.transitionMatrix.getRowSum(row))) {
                        std::cout << "Row " << row << " does not sum up to one. " << modelComponents.transitionMatrix.getRowSum(row) << " instead" << std::endl;
                    }
                }
                auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
                if (computeRewards) {
                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
                    for (auto const &iter : beliefStateMap.left) {
                        auto currentBelief = beliefList[iter.first];
                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
                        for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
                        auto currentBelief = newBeliefGrid.getGridPoint(iter.first);
                        auto representativeState = currentBelief.begin()->first;
                        for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) {
                            // Add the reward
                            mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
                                                                getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
                                                                                     currentBelief));
                            uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
                            uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
                            mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
                        }
                    }
                    overApproxMdp.addRewardModel("std", mdpRewardModel);
                    overApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
                    overApproxMdp->addRewardModel("default", mdpRewardModel);
                    overApproxMdp->restrictRewardModels(std::set<std::string>({"default"}));
                }
                statistics.overApproximationBuildTime.stop();
                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                overApproxMdp.printModelInformationToStream(std::cout);
                overApproxMdp->printModelInformationToStream(std::cout);
                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);
                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model);
                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
                std::string propertyString = computeRewards ? "R" : "P";
                propertyString += min ? "min" : "max";
                propertyString += "=? [F \"target\"]";
@ -604,31 +558,34 @@ namespace storm {
                auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
                task.setHint(hintPtr);
                statistics.overApproximationCheckTime.start();
                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
                statistics.overApproximationCheckTime.stop();
                if (storm::utility::resources::isTerminate() && !res) {
                    return nullptr;
                }
                STORM_LOG_ASSERT(res, "Result does not exist.");
                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)];
                STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
                //auto underApprox = weightedSumUnderMap[initialBelief.id];
                auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards,
                                                                       maxUaModelSize);
                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
                //auto underApprox = weightedSumUnderMap[initialBelief.id];
                /* TODO: Enable under approx again:
                auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                    return std::make_unique<RefinementComponents<ValueType>>(
                        RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
                }
                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
                return std::make_unique<RefinementComponents<ValueType>>(
                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
                                                        underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
                                                        underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
                */
                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, storm::utility::zero<ValueType>(), overApproxResultMap,
                                                                                                         {}, {}, {}, {}, beliefStateMap, {}, initialBelief.id});
            }
            template<typename ValueType, typename RewardModelType>
@ -973,7 +930,8 @@ namespace storm {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
@ -1307,7 +1265,16 @@ namespace storm {
            }
            template<typename ValueType, typename RewardModelType>
            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief) {
            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
                auto result = storm::utility::zero<ValueType>();
                for (auto const &probEntry : belief) {
                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
                }
                return result;
            }
            template<typename ValueType, typename RewardModelType>
            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
                auto result = storm::utility::zero<ValueType>();
                for (auto const &probEntry : belief.probabilities) {
                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -240,7 +240,8 @@ namespace storm {
                 * @param belief the belief in which the action is performed
                 * @return the reward earned by performing the action in the belief
                 */
                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief);
                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
                ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
                struct Statistics {
                    Statistics();
--- a/src/storm-pomdp/storage/BeliefGrid.h
+++ b/src/storm-pomdp/storage/BeliefGrid.h
@ -0,0 +1,296 @@
 #pragma once
 #include <map>
 #include <boost/optional.hpp>
 //#include <boost/container/flat_map.hpp>
 #include "storm/utility/macros.h"
 #include "storm/exceptions/UnexpectedException.h"
 namespace storm {
    namespace storage {
        template <typename PomdpType, typename BeliefValueType, typename StateType = uint64_t>
        class BeliefGrid {
        public:
            typedef typename PomdpType::ValueType ValueType;
            //typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType
            typedef std::map<StateType, BeliefValueType> BeliefType;
            typedef uint64_t BeliefId;
            BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
                // Intentionally left empty
            }
            struct Triangulation {
                std::vector<BeliefId> gridPoints;
                std::vector<BeliefValueType> weights;
                uint64_t size() const {
                    return weights.size();
                }
            };
            BeliefType const& getGridPoint(BeliefId const& id) const {
                return gridPoints[id];
            }
            BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const {
                auto idIt = gridPointToIdMap.find(gridPoint);
                STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state.");
                return idIt->second;
            }
            bool isEqual(BeliefType const& first, BeliefType const& second) const {
                if (first.size() != second.size()) {
                    return false;
                }
                auto secondIt = second.begin();
                for (auto const& firstEntry : first) {
                    if (firstEntry.first != secondIt->first) {
                        return false;
                    }
                    if (!cc.isEqual(firstEntry.second, secondIt->second)) {
                        return false;
                    }
                    ++secondIt;
                }
                return true;
            }
            bool assertBelief(BeliefType const& belief) const {
                BeliefValueType sum = storm::utility::zero<ValueType>();
                boost::optional<uint32_t> observation;
                for (auto const& entry : belief) {
                    uintmax_t entryObservation = pomdp.getObservation(entry.first);
                    if (observation) {
                        if (observation.get() != entryObservation) {
                            STORM_LOG_ERROR("Beliefsupport contains different observations.");
                            return false;
                        }
                    } else {
                        observation = entryObservation;
                    }
                    if (cc.isZero(entry.second)) {
                        // We assume that beliefs only consider their support.
                        STORM_LOG_ERROR("Zero belief probability.");
                        return false;
                    }
                    if (cc.isLess(entry.second, storm::utility::zero<BeliefValueType>())) {
                        STORM_LOG_ERROR("Negative belief probability.");
                        return false;
                    }
                    if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) {
                        STORM_LOG_ERROR("Belief probability greater than one.");
                        return false;
                    }
                    sum += entry.second;
                }
                if (!cc.isOne(sum)) {
                    STORM_LOG_ERROR("Belief does not sum up to one.");
                    return false;
                }
                return true;
            }
            bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const {
                if (triangulation.weights.size() != triangulation.gridPoints.size()) {
                    STORM_LOG_ERROR("Number of weights and points in triangulation does not match.");
                    return false;
                }
                if (triangulation.size() == 0) {
                    STORM_LOG_ERROR("Empty triangulation.");
                    return false;
                }
                BeliefType triangulatedBelief;
                BeliefValueType weightSum = storm::utility::zero<BeliefValueType>();
                for (uint64_t i = 0; i < triangulation.weights.size(); ++i) {
                    if (cc.isZero(triangulation.weights[i])) {
                        STORM_LOG_ERROR("Zero weight in triangulation.");
                        return false;
                    }
                    if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) {
                        STORM_LOG_ERROR("Negative weight in triangulation.");
                        return false;
                    }
                    if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) {
                        STORM_LOG_ERROR("Weight greater than one in triangulation.");
                    }
                    weightSum += triangulation.weights[i];
                    BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]);
                    for (auto const& pointEntry : gridPoint) {
                        BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second;
                        triangulatedValue += triangulation.weights[i] * pointEntry.second;
                    }
                }
                if (!cc.isOne(weightSum)) {
                    STORM_LOG_ERROR("Triangulation weights do not sum up to one.");
                    return false;
                }
                if (!assertBelief(triangulatedBelief)) {
                    STORM_LOG_ERROR("Triangulated belief is not a belief.");
                }
                if (!isEqual(belief, triangulatedBelief)) {
                    STORM_LOG_ERROR("Belief does not match triangulated belief.");
                    return false;
                }
                return true;
            }
            Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                //TODO this can also be simplified using the sparse vector interpretation
                //TODO Enable chaching for this method?
                STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
                auto nrStates = pomdp.getNumberOfStates();
                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
                // Variable names are based on the paper
                // TODO avoid reallocations for these vectors
                std::vector<BeliefValueType> x(nrStates);
                std::vector<BeliefValueType> v(nrStates);
                std::vector<BeliefValueType> d(nrStates);
                auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
                for (size_t i = 0; i < nrStates; ++i) {
                    for (auto const &probEntry : belief) {
                        if (probEntry.first >= i) {
                            x[i] += convResolution * probEntry.second;
                        }
                    }
                    v[i] = storm::utility::floor(x[i]);
                    d[i] = x[i] - v[i];
                }
                auto p = storm::utility::vector::getSortedIndices(d);
                std::vector<std::vector<BeliefValueType>> qs(nrStates, std::vector<BeliefValueType>(nrStates));
                for (size_t i = 0; i < nrStates; ++i) {
                    if (i == 0) {
                        for (size_t j = 0; j < nrStates; ++j) {
                            qs[i][j] = v[j];
                        }
                    } else {
                        for (size_t j = 0; j < nrStates; ++j) {
                            if (j == p[i - 1]) {
                                qs[i][j] = qs[i - 1][j] + storm::utility::one<BeliefValueType>();
                            } else {
                                qs[i][j] = qs[i - 1][j];
                            }
                        }
                    }
                }
                Triangulation result;
                // The first weight is 1-sum(other weights). We therefore process the js in reverse order
                BeliefValueType firstWeight = storm::utility::one<BeliefValueType>();
                for (size_t j = nrStates; j > 0;) {
                    --j;
                    // First create the weights. The weights vector will be reversed at the end.
                    ValueType weight;
                    if (j == 0) {
                        weight = firstWeight;
                    } else {
                        weight = d[p[j - 1]] - d[p[j]];
                        firstWeight -= weight;
                    }
                    if (!cc.isZero(weight)) {
                        result.weights.push_back(weight);
                        BeliefType gridPoint;
                        auto const& qsj = qs[j];
                        for (size_t i = 0; i < nrStates - 1; ++i) {
                            BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1];
                            if (!cc.isZero(gridPointEntry)) {
                                gridPoint[i] = gridPointEntry / convResolution;
                            }
                        }
                        if (!cc.isZero(qsj[nrStates - 1])) {
                            gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution;
                        }
                        result.gridPoints.push_back(getOrAddGridPointId(gridPoint));
                    }
                }
                std::reverse(result.weights.begin(), result.weights.end());
                STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation.");
                return result;
            }
            template<typename DistributionType>
            void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
                auto insertionRes = distr.emplace(state, value);
                if (!insertionRes.second) {
                    insertionRes.first->second += value;
                }
            }
            BeliefId getNumberOfGridPointIds() const {
                return gridPoints.size();
            }
            std::map<BeliefId, ValueType> expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
                std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
                BeliefType gridPoint = getGridPoint(gridPointId);
                // Find the probability we go to each observation
                BeliefType successorObs; // This is actually not a belief but has the same type
                for (auto const& pointEntry : gridPoint) {
                    uint64_t state = pointEntry.first;
                    for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
                        if (!storm::utility::isZero(pomdpTransition.getValue())) {
                            auto obs = pomdp.getObservation(pomdpTransition.getColumn());
                            addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue());
                        }
                    }
                }
                // Now for each successor observation we find and triangulate the successor belief
                for (auto const& successor : successorObs) {
                    BeliefType successorBelief;
                    for (auto const& pointEntry : gridPoint) {
                        uint64_t state = pointEntry.first;
                        for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
                            if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) {
                                ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second;
                                addToDistribution(successorBelief, pomdpTransition.getColumn(), prob);
                            }
                        }
                    }
                    STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
                    Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]);
                    for (size_t j = 0; j < triangulation.size(); ++j) {
                        addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
                    }
                }
                return destinations;
            }
        private:
            BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
                auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size());
                if (insertioRes.second) {
                    // There actually was an insertion, so add the new grid state
                    gridPoints.push_back(gridPoint);
                }
                // Return the id
                return insertioRes.first->second;
            }
            PomdpType const& pomdp;
            uint64_t resolution;
            std::vector<BeliefType> gridPoints;
            std::map<BeliefType, BeliefId> gridPointToIdMap;
            storm::utility::ConstantsComparator<ValueType> cc;
        };
    }
 }