From 6fee61feb1628846d93617194246ba07604432fd Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Fri, 27 Mar 2020 15:11:52 +0100
Subject: [PATCH] POMDP: Started to split belief logic from exploration logic.

---
 .../ApproximatePOMDPModelchecker.cpp          | 343 ++++++++----------
 .../ApproximatePOMDPModelchecker.h            |   3 +-
 src/storm-pomdp/storage/BeliefGrid.h          | 296 +++++++++++++++
 3 files changed, 453 insertions(+), 189 deletions(-)
 create mode 100644 src/storm-pomdp/storage/BeliefGrid.h
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 1ff8d07ca..16704e4a4 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -1,5 +1,7 @@
 #include "ApproximatePOMDPModelchecker.h"
 
+#include <tuple>
+
 #include <boost/algorithm/string.hpp>
 
 #include "storm-pomdp/analysis/FormulaInformation.h"
@@ -21,6 +23,8 @@
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
 
+#include "storm-pomdp/storage/BeliefGrid.h"
+
 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
 #include "storm/exceptions/NotSupportedException.h"
@@ -322,6 +326,16 @@ namespace storm {
             }
 
 
+            
+            template <typename ValueType, typename BeliefType, typename SummandsType>
+            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
+                ValueType result = storm::utility::zero<ValueType>();
+                for (auto const& entry : belief) {
+                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
+                }
+                return result;
+            }
+            
             template<typename ValueType, typename RewardModelType>
             std::shared_ptr<RefinementComponents<ValueType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
@@ -338,15 +352,14 @@ namespace storm {
                     underMap = underApproximationMap.value();
                 }
 
-                std::vector<storm::pomdp::Belief<ValueType>> beliefList;
-                std::vector<bool> beliefIsTarget;
-                std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
+                storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>, ValueType> newBeliefGrid(pomdp, options.numericPrecision);
                 //Use caching to avoid multiple computation of the subsimplices and lambdas
                 std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
                 std::map<uint64_t, std::vector<ValueType>> lambdaCache;
                 bsmap_type beliefStateMap;
 
                 std::deque<uint64_t> beliefsToBeExpanded;
+                storm::storage::BitVector expandedBeliefs;
 
                 // current ID -> action -> reward
                 std::map<uint64_t, std::vector<ValueType>> beliefActionRewards;
@@ -355,244 +368,185 @@ namespace storm {
                 // Initial belief always has belief ID 0
                 storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(nextId);
                 ++nextId;
-                beliefList.push_back(initialBelief);
-                beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
 
                 // These are the components to build the MDP from the grid
                 // Reserve states 0 and 1 as always sink/goal states
-                std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}},
-                                                                                          {{{1, storm::utility::one<ValueType>()}}}};
+                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
+                uint64_t extraBottomState = 0;
+                uint64_t extraTargetState = computeRewards ? 0 : 1;
+                uint64_t nextMdpStateId = extraTargetState + 1;
+                uint64_t mdpMatrixRow = 0;
+                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
+                    ++mdpMatrixRow;
+                }
                 // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
-                std::vector<ValueType> hintVector = {storm::utility::zero<ValueType>(), storm::utility::one<ValueType>()};
-                std::vector<uint64_t> targetStates = {1};
-                uint64_t mdpStateId = 2;
-
-                beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId));
-                ++mdpStateId;
+                std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
+                if (!computeRewards) {
+                    hintVector[extraTargetState] = storm::utility::one<ValueType>();
+                }
+                std::vector<uint64_t> targetStates = {extraTargetState};
 
                 // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
                 std::map<uint64_t, ValueType> weightedSumOverMap;
                 std::map<uint64_t, ValueType> weightedSumUnderMap;
 
                 // for the initial belief, add the triangulated initial states
-                auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates());
-                auto initSubSimplex = initTemp.first;
-                auto initLambdas = initTemp.second;
+                auto triangulation = newBeliefGrid.triangulateBelief(initialBelief.probabilities, observationResolutionVector[initialBelief.observation]);
                 if (options.cacheSubsimplices) {
-                    subSimplexCache[0] = initSubSimplex;
-                    lambdaCache[0] = initLambdas;
+                    //subSimplexCache[0] = initSubSimplex;
+                    //lambdaCache[0] = initLambdas;
                 }
                 std::vector<std::map<uint64_t, ValueType>> initTransitionsInBelief;
-                std::map<uint64_t, ValueType> initTransitionInActionBelief;
-                bool initInserted = false;
-                for (size_t j = 0; j < initLambdas.size(); ++j) {
-                    if (!cc.isEqual(initLambdas[j], storm::utility::zero<ValueType>())) {
-                        uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]);
-                        if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) {
-                            if (searchResult == 0) {
-                                // the initial belief is on the grid itself
-                                if (boundMapsSet) {
-                                    auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                    auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                    for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
-                                        tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
-                                        tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
-                                    }
-                                    weightedSumOverMap[initialBelief.id] = tempWeightedSumOver;
-                                    weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder;
-                                }
-                                initInserted = true;
-                                beliefGrid.push_back(initialBelief);
-                                beliefsToBeExpanded.push_back(0);
-                                hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                    : storm::utility::zero<ValueType>());
-                            } else {
-                                // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
-                                if (boundMapsSet) {
-                                    auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                    auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                    for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
-                                        tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
-                                        tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
-                                    }
-
-                                    weightedSumOverMap[nextId] = tempWeightedSumOver;
-                                    weightedSumUnderMap[nextId] = tempWeightedSumUnder;
-                                }
-
-                                storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]};
-                                beliefList.push_back(gridBelief);
-                                beliefGrid.push_back(gridBelief);
-                                beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
-                                beliefsToBeExpanded.push_back(nextId);
-                                ++nextId;
-
-                                hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
+                uint64_t initialMdpState = nextMdpStateId;
+                ++nextMdpStateId;
+                if (triangulation.size() == 1) {
+                    // The initial belief is on the grid itself
+                    auto initBeliefId = triangulation.gridPoints.front();
+                    if (boundMapsSet) {
+                        auto const& gridPoint = newBeliefGrid.getGridPoint(initBeliefId);
+                        weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
+                        weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
+                    }
+                    beliefsToBeExpanded.push_back(initBeliefId);
+                    beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
+                    hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
                                                                                                                                     : storm::utility::zero<ValueType>());
-
-                                beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
-                                initTransitionInActionBelief[mdpStateId] = initLambdas[j];
-                                ++nextId;
-                                ++mdpStateId;
-                            }
+                } else {
+                    // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    for (uint64_t i = 0; i < triangulation.size(); ++i) {
+                        beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
+                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
+                        beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
+                        ++nextMdpStateId;
+                        if (boundMapsSet) {
+                            auto const& gridPoint = newBeliefGrid.getGridPoint(triangulation.gridPoints[i]);
+                            weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
+                            weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
                         }
+                        hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
+                                                                                                                                    : storm::utility::zero<ValueType>());
                     }
+                    //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
+                    ++mdpMatrixRow;
                 }
-
-                // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
-                if (!initTransitionInActionBelief.empty()) {
-                    initTransitionsInBelief.push_back(initTransitionInActionBelief);
-                    mdpTransitions.push_back(initTransitionsInBelief);
-                }
-                //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
-
+                
                 // Expand the beliefs to generate the grid on-the-fly
                 if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                     STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                 }
+                expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
                 while (!beliefsToBeExpanded.empty()) {
                     uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
-                    bool isTarget = beliefIsTarget[currId];
 
-                    if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
-                        // TODO: with rewards whe would have to assign the corresponding reward to this transition
-                        mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
-                        continue;
-                    }
-
-                    if (isTarget) {
-                        // Depending on whether we compute rewards, we select the right initial result
-                        // MDP stuff
-                        targetStates.push_back(beliefStateMap.left.at(currId));
-                        mdpTransitions.push_back({{{beliefStateMap.left.at(currId), storm::utility::one<ValueType>()}}});
+                    beliefsToBeExpanded.pop_front();
+                    expandedBeliefs.set(currId, true); // Do not expand this belief again.
+                    assert(currId < expandedBeliefs.size());
+                    uint64_t currMdpState = beliefStateMap.left.at(currId);
+                    auto const& currBelief = newBeliefGrid.getGridPoint(currId);
+                    uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first);
+                    
+                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                    
+                    if (targetObservations.count(currObservation) != 0) {
+                        // Make this state absorbing
+                        targetStates.push_back(currMdpState);
+                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
+                        ++mdpMatrixRow;
+                    } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
+                        // TODO: with rewards we would have to assign the corresponding reward to this transition
+                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
+                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
+                        ++mdpMatrixRow;
                     } else {
-                        uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front();
-                        uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
+                        auto const& currBelief = newBeliefGrid.getGridPoint(currId);
+                        uint64_t someState = currBelief.begin()->first;
+                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
                         std::vector<ValueType> actionRewardsInState(numChoices);
                         std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;
 
                         for (uint64_t action = 0; action < numChoices; ++action) {
-                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action);
-                            std::map<uint64_t, ValueType> transitionInActionBelief;
-                            for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
-                                uint32_t observation = iter->first;
-                                // THIS CALL IS SLOW
-                                // TODO speed this up
-                                uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action,
-                                                                                           observation, nextId);
-                                nextId = beliefList.size();
-                                //Triangulate here and put the possibly resulting belief in the grid
-                                std::vector<std::map<uint64_t, ValueType>> subSimplex;
-                                std::vector<ValueType> lambdas;
-                                if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
-                                    subSimplex = subSimplexCache[idNextBelief];
-                                    lambdas = lambdaCache[idNextBelief];
-                                } else {
-                                    auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities,
-                                                                            observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates());
-                                    subSimplex = temp.first;
-                                    lambdas = temp.second;
-                                    if (options.cacheSubsimplices) {
-                                        subSimplexCache[idNextBelief] = subSimplex;
-                                        lambdaCache[idNextBelief] = lambdas;
-                                    }
-                                }
-
-                                for (size_t j = 0; j < lambdas.size(); ++j) {
-                                    if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                        auto approxId = getBeliefIdInVector(beliefGrid, observation, subSimplex[j]);
-                                        if (approxId == uint64_t(-1)) {
-                                            // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
-                                            storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]};
-                                            beliefList.push_back(gridBelief);
-                                            beliefGrid.push_back(gridBelief);
-                                            beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                                            // compute overapproximate value using MDP result map
-                                            if (boundMapsSet) {
-                                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                                for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
-                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
-                                                }
-                                                if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) {
-                                                    hintVector.push_back(tempWeightedSumOver);
-                                                } else {
-                                                    hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                          : storm::utility::zero<ValueType>());
-                                                }
-                                                weightedSumOverMap[nextId] = tempWeightedSumOver;
-                                                weightedSumUnderMap[nextId] = tempWeightedSumUnder;
-                                            } else {
-                                                hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                      : storm::utility::zero<ValueType>());
-                                            }
-                                            beliefsToBeExpanded.push_back(nextId);
-                                            beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
-                                            transitionInActionBelief[mdpStateId] = iter->second * lambdas[j];
-                                            ++nextId;
-                                            ++mdpStateId;
+                            auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector);
+                            // Check for newly found grid points
+                            expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
+                            for (auto const& successor : successorGridPoints) {
+                                auto successorId = successor.first;
+                                auto successorBelief = newBeliefGrid.getGridPoint(successorId);
+                                auto successorObservation = pomdp.getObservation(successorBelief.begin()->first);
+                                if (!expandedBeliefs.get(successorId)) {
+                                    beliefsToBeExpanded.push_back(successorId);
+                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
+                                    ++nextMdpStateId;
+                                    
+                                    if (boundMapsSet) {
+                                        ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
+                                        ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
+                                        if (cc.isEqual(upperBound, lowerBound)) {
+                                            hintVector.push_back(lowerBound);
                                         } else {
-                                            transitionInActionBelief[beliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
+                                            hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
                                         }
+                                        weightedSumOverMap[successorId] = upperBound;
+                                        weightedSumUnderMap[successorId] = lowerBound;
+                                    } else {
+                                        hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
                                     }
                                 }
+                                auto successorMdpState = beliefStateMap.left.at(successorId);
+                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
+                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                             }
-                            if (!transitionInActionBelief.empty()) {
-                                transitionsInBelief.push_back(transitionInActionBelief);
-                            }
+                            ++mdpMatrixRow;
                         }
-                        if (transitionsInBelief.empty()) {
-                            std::map<uint64_t, ValueType> transitionInActionBelief;
-                            transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
-                            transitionsInBelief.push_back(transitionInActionBelief);
-                        }
-                        mdpTransitions.push_back(transitionsInBelief);
                     }
                     if (storm::utility::resources::isTerminate()) {
                         statistics.overApproximationBuildAborted = true;
                         break;
                     }
                 }
-                statistics.overApproximationStates = mdpTransitions.size();
-                STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl);
+                statistics.overApproximationStates = nextMdpStateId;
                 STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                 if (storm::utility::resources::isTerminate()) {
                     statistics.overApproximationBuildTime.stop();
                     return nullptr;
                 }
                 
-                storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size());
+                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
                 mdpLabeling.addLabel("init");
                 mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id));
+                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialMdpState));
                 for (auto targetState : targetStates) {
                     mdpLabeling.addLabelToState("target", targetState);
                 }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling);
-                storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
+                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
+                for (uint64_t row = 0; row < modelComponents.transitionMatrix.getRowCount(); ++row) {
+                    if (!storm::utility::isOne(modelComponents.transitionMatrix.getRowSum(row))) {
+                        std::cout << "Row " << row << " does not sum up to one. " << modelComponents.transitionMatrix.getRowSum(row) << " instead" << std::endl;
+                    }
+                }
+                auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
                 if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
+                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
                     for (auto const &iter : beliefStateMap.left) {
-                        auto currentBelief = beliefList[iter.first];
-                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                        for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
+                        auto currentBelief = newBeliefGrid.getGridPoint(iter.first);
+                        auto representativeState = currentBelief.begin()->first;
+                        for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) {
                             // Add the reward
-                            mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                                getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                     currentBelief));
+                            uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
+                            uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
+                            mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
                         }
                     }
-                    overApproxMdp.addRewardModel("std", mdpRewardModel);
-                    overApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
+                    overApproxMdp->addRewardModel("default", mdpRewardModel);
+                    overApproxMdp->restrictRewardModels(std::set<std::string>({"default"}));
                 }
                 statistics.overApproximationBuildTime.stop();
                 STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
-                overApproxMdp.printModelInformationToStream(std::cout);
+                overApproxMdp->printModelInformationToStream(std::cout);
 
-                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);
-                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model);
+                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
                 std::string propertyString = computeRewards ? "R" : "P";
                 propertyString += min ? "min" : "max";
                 propertyString += "=? [F \"target\"]";
@@ -604,31 +558,34 @@ namespace storm {
                 auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
                 task.setHint(hintPtr);
                 statistics.overApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
                 statistics.overApproximationCheckTime.stop();
                 if (storm::utility::resources::isTerminate() && !res) {
                     return nullptr;
                 }
                 STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
+                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
                 auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                 auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)];
 
                 STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
-                //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards,
-                                                                       maxUaModelSize);
                 STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
+                //auto underApprox = weightedSumUnderMap[initialBelief.id];
+                /* TODO: Enable under approx again:
+                auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
                 if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                     return std::make_unique<RefinementComponents<ValueType>>(
                         RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
                 }
                 STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
-
                 return std::make_unique<RefinementComponents<ValueType>>(
                         RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
                                                         underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
                                                         underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
+                */
+                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, storm::utility::zero<ValueType>(), overApproxResultMap,
+                                                                                                         {}, {}, {}, {}, beliefStateMap, {}, initialBelief.id});
+
             }
 
             template<typename ValueType, typename RewardModelType>
@@ -973,7 +930,8 @@ namespace storm {
                 std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                 return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
             }
-
+            
+            
             template<typename ValueType, typename RewardModelType>
             std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
@@ -1307,7 +1265,16 @@ namespace storm {
             }
 
             template<typename ValueType, typename RewardModelType>
-            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief) {
+            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
+                auto result = storm::utility::zero<ValueType>();
+                for (auto const &probEntry : belief) {
+                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
+                }
+                return result;
+            }
+
+            template<typename ValueType, typename RewardModelType>
+            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
                 auto result = storm::utility::zero<ValueType>();
                 for (auto const &probEntry : belief.probabilities) {
                     result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 782220358..4820ea380 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -240,7 +240,8 @@ namespace storm {
                  * @param belief the belief in which the action is performed
                  * @return the reward earned by performing the action in the belief
                  */
-                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief);
+                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
+                ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
 
                 struct Statistics {
                     Statistics();
diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h
new file mode 100644
index 000000000..a75872ee4
--- /dev/null
+++ b/src/storm-pomdp/storage/BeliefGrid.h
@@ -0,0 +1,296 @@
+#pragma once
+
+#include <map>
+#include <boost/optional.hpp>
+//#include <boost/container/flat_map.hpp>
+
+#include "storm/utility/macros.h"
+#include "storm/exceptions/UnexpectedException.h"
+
+namespace storm {
+    namespace storage {
+        
+        template <typename PomdpType, typename BeliefValueType, typename StateType = uint64_t>
+        class BeliefGrid {
+        public:
+            
+            typedef typename PomdpType::ValueType ValueType;
+            //typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType
+            typedef std::map<StateType, BeliefValueType> BeliefType;
+            typedef uint64_t BeliefId;
+            
+            BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
+                // Intentionally left empty
+            }
+            
+            struct Triangulation {
+                std::vector<BeliefId> gridPoints;
+                std::vector<BeliefValueType> weights;
+                uint64_t size() const {
+                    return weights.size();
+                }
+            };
+            
+            BeliefType const& getGridPoint(BeliefId const& id) const {
+                return gridPoints[id];
+            }
+            
+            BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const {
+                auto idIt = gridPointToIdMap.find(gridPoint);
+                STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state.");
+                return idIt->second;
+            }
+            
+            bool isEqual(BeliefType const& first, BeliefType const& second) const {
+                if (first.size() != second.size()) {
+                    return false;
+                }
+                auto secondIt = second.begin();
+                for (auto const& firstEntry : first) {
+                    if (firstEntry.first != secondIt->first) {
+                        return false;
+                    }
+                    if (!cc.isEqual(firstEntry.second, secondIt->second)) {
+                        return false;
+                    }
+                    ++secondIt;
+                }
+                return true;
+            }
+            
+            bool assertBelief(BeliefType const& belief) const {
+                BeliefValueType sum = storm::utility::zero<ValueType>();
+                boost::optional<uint32_t> observation;
+                for (auto const& entry : belief) {
+                    uintmax_t entryObservation = pomdp.getObservation(entry.first);
+                    if (observation) {
+                        if (observation.get() != entryObservation) {
+                            STORM_LOG_ERROR("Beliefsupport contains different observations.");
+                            return false;
+                        }
+                    } else {
+                        observation = entryObservation;
+                    }
+                    if (cc.isZero(entry.second)) {
+                        // We assume that beliefs only consider their support.
+                        STORM_LOG_ERROR("Zero belief probability.");
+                        return false;
+                    }
+                    if (cc.isLess(entry.second, storm::utility::zero<BeliefValueType>())) {
+                        STORM_LOG_ERROR("Negative belief probability.");
+                        return false;
+                    }
+                    if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) {
+                        STORM_LOG_ERROR("Belief probability greater than one.");
+                        return false;
+                    }
+                    sum += entry.second;
+                }
+                if (!cc.isOne(sum)) {
+                    STORM_LOG_ERROR("Belief does not sum up to one.");
+                    return false;
+                }
+                return true;
+            }
+            
+            bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const {
+                if (triangulation.weights.size() != triangulation.gridPoints.size()) {
+                    STORM_LOG_ERROR("Number of weights and points in triangulation does not match.");
+                    return false;
+                }
+                if (triangulation.size() == 0) {
+                    STORM_LOG_ERROR("Empty triangulation.");
+                    return false;
+                }
+                BeliefType triangulatedBelief;
+                BeliefValueType weightSum = storm::utility::zero<BeliefValueType>();
+                for (uint64_t i = 0; i < triangulation.weights.size(); ++i) {
+                    if (cc.isZero(triangulation.weights[i])) {
+                        STORM_LOG_ERROR("Zero weight in triangulation.");
+                        return false;
+                    }
+                    if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) {
+                        STORM_LOG_ERROR("Negative weight in triangulation.");
+                        return false;
+                    }
+                    if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) {
+                        STORM_LOG_ERROR("Weight greater than one in triangulation.");
+                    }
+                    weightSum += triangulation.weights[i];
+                    BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]);
+                    for (auto const& pointEntry : gridPoint) {
+                        BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second;
+                        triangulatedValue += triangulation.weights[i] * pointEntry.second;
+                    }
+                }
+                if (!cc.isOne(weightSum)) {
+                    STORM_LOG_ERROR("Triangulation weights do not sum up to one.");
+                    return false;
+                }
+                if (!assertBelief(triangulatedBelief)) {
+                    STORM_LOG_ERROR("Triangulated belief is not a belief.");
+                }
+                if (!isEqual(belief, triangulatedBelief)) {
+                    STORM_LOG_ERROR("Belief does not match triangulated belief.");
+                    return false;
+                }
+                return true;
+            }
+            
+            Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
+                //TODO this can also be simplified using the sparse vector interpretation
+                //TODO Enable chaching for this method?
+                STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
+                
+                auto nrStates = pomdp.getNumberOfStates();
+                
+                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
+                // Variable names are based on the paper
+                // TODO avoid reallocations for these vectors
+                std::vector<BeliefValueType> x(nrStates);
+                std::vector<BeliefValueType> v(nrStates);
+                std::vector<BeliefValueType> d(nrStates);
+                auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
+
+                for (size_t i = 0; i < nrStates; ++i) {
+                    for (auto const &probEntry : belief) {
+                        if (probEntry.first >= i) {
+                            x[i] += convResolution * probEntry.second;
+                        }
+                    }
+                    v[i] = storm::utility::floor(x[i]);
+                    d[i] = x[i] - v[i];
+                }
+
+                auto p = storm::utility::vector::getSortedIndices(d);
+
+                std::vector<std::vector<BeliefValueType>> qs(nrStates, std::vector<BeliefValueType>(nrStates));
+                for (size_t i = 0; i < nrStates; ++i) {
+                    if (i == 0) {
+                        for (size_t j = 0; j < nrStates; ++j) {
+                            qs[i][j] = v[j];
+                        }
+                    } else {
+                        for (size_t j = 0; j < nrStates; ++j) {
+                            if (j == p[i - 1]) {
+                                qs[i][j] = qs[i - 1][j] + storm::utility::one<BeliefValueType>();
+                            } else {
+                                qs[i][j] = qs[i - 1][j];
+                            }
+                        }
+                    }
+                }
+                
+                Triangulation result;
+                // The first weight is 1-sum(other weights). We therefore process the js in reverse order
+                BeliefValueType firstWeight = storm::utility::one<BeliefValueType>();
+                for (size_t j = nrStates; j > 0;) {
+                    --j;
+                    // First create the weights. The weights vector will be reversed at the end.
+                    ValueType weight;
+                    if (j == 0) {
+                        weight = firstWeight;
+                    } else {
+                        weight = d[p[j - 1]] - d[p[j]];
+                        firstWeight -= weight;
+                    }
+                    if (!cc.isZero(weight)) {
+                        result.weights.push_back(weight);
+                        BeliefType gridPoint;
+                        auto const& qsj = qs[j];
+                        for (size_t i = 0; i < nrStates - 1; ++i) {
+                            BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1];
+                            if (!cc.isZero(gridPointEntry)) {
+                                gridPoint[i] = gridPointEntry / convResolution;
+                            }
+                        }
+                        if (!cc.isZero(qsj[nrStates - 1])) {
+                            gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution;
+                        }
+                        result.gridPoints.push_back(getOrAddGridPointId(gridPoint));
+                    }
+                }
+                std::reverse(result.weights.begin(), result.weights.end());
+                
+                STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation.");
+                
+                return result;
+            }
+            
+            template<typename DistributionType>
+            void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
+                auto insertionRes = distr.emplace(state, value);
+                if (!insertionRes.second) {
+                    insertionRes.first->second += value;
+                }
+            }
+            
+            BeliefId getNumberOfGridPointIds() const {
+                return gridPoints.size();
+            }
+            
+            std::map<BeliefId, ValueType> expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+                
+                std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
+                
+                BeliefType gridPoint = getGridPoint(gridPointId);
+                
+                // Find the probability we go to each observation
+                BeliefType successorObs; // This is actually not a belief but has the same type
+                for (auto const& pointEntry : gridPoint) {
+                    uint64_t state = pointEntry.first;
+                    for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
+                        if (!storm::utility::isZero(pomdpTransition.getValue())) {
+                            auto obs = pomdp.getObservation(pomdpTransition.getColumn());
+                            addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue());
+                        }
+                    }
+                }
+                
+                // Now for each successor observation we find and triangulate the successor belief
+                for (auto const& successor : successorObs) {
+                    BeliefType successorBelief;
+                    for (auto const& pointEntry : gridPoint) {
+                        uint64_t state = pointEntry.first;
+                        for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
+                            if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) {
+                                ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second;
+                                addToDistribution(successorBelief, pomdpTransition.getColumn(), prob);
+                            }
+                        }
+                    }
+                    STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
+                    
+                    Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]);
+                    for (size_t j = 0; j < triangulation.size(); ++j) {
+                        addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
+                    }
+                }
+    
+                return destinations;
+                
+            }
+            
+        private:
+            
+            BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
+                auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size());
+                if (insertioRes.second) {
+                    // There actually was an insertion, so add the new grid state
+                    gridPoints.push_back(gridPoint);
+                }
+                // Return the id
+                return insertioRes.first->second;
+            }
+            
+            PomdpType const& pomdp;
+            uint64_t resolution;
+            
+            std::vector<BeliefType> gridPoints;
+            std::map<BeliefType, BeliefId> gridPointToIdMap;
+            storm::utility::ConstantsComparator<ValueType> cc;
+            
+            
+        };
+    }
+}
\ No newline at end of file