From 0facf4a5726f1edf3e2a7896e7f5320c50aead65 Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Tue, 17 Dec 2019 10:52:56 +0100
Subject: [PATCH] Preparation work for the implementation of the refinement
 procedure

---
 src/storm-pomdp-cli/storm-pomdp.cpp           |  22 +-
 .../ApproximatePOMDPModelchecker.cpp          | 205 ++++++++++--------
 .../ApproximatePOMDPModelchecker.h            |  47 +++-
 3 files changed, 164 insertions(+), 110 deletions(-)
diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp
index 8b28da0bf..7cbd84592 100644
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@@ -197,15 +197,15 @@ int main(const int argc, const char** argv) {
                     STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!");
 
                     storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double>();
-                    double overRes = storm::utility::one<double>();
-                    double underRes = storm::utility::zero<double>();
+                    auto overRes = storm::utility::one<double>();
+                    auto underRes = storm::utility::zero<double>();
                     std::unique_ptr<storm::pomdp::modelchecker::POMDPCheckResult<double>> result;
 
-                    //result = checker.refineReachabilityProbability(*pomdp, targetObservationSet,probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(),1,10);
-                    result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize,
-                                                                       pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold());
-                    overRes = result->OverapproximationValue;
-                    underRes = result->UnderapproximationValue;
+                    result = checker.refineReachabilityProbability(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize,
+                                                                   pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold());
+                    //result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold());
+                    overRes = result->overApproxValue;
+                    underRes = result->underApproxValue;
                     if (overRes != underRes) {
                         STORM_PRINT("Overapproximation Result: " << overRes << std::endl)
                         STORM_PRINT("Underapproximation Result: " << underRes << std::endl)
@@ -264,15 +264,15 @@ int main(const int argc, const char** argv) {
                     STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!");
 
                     storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double>();
-                    double overRes = storm::utility::one<double>();
-                    double underRes = storm::utility::zero<double>();
+                    auto overRes = storm::utility::one<double>();
+                    auto underRes = storm::utility::zero<double>();
                     std::unique_ptr<storm::pomdp::modelchecker::POMDPCheckResult<double>> result;
                     result = checker.computeReachabilityReward(*pomdp, targetObservationSet,
                                                                rewFormula.getOptimalityType() ==
                                                                storm::OptimizationDirection::Minimize,
                                                                pomdpSettings.getGridResolution());
-                    overRes = result->OverapproximationValue;
-                    underRes = result->UnderapproximationValue;
+                    overRes = result->overApproxValue;
+                    underRes = result->underApproxValue;
                 }
 
             }
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index fd8502563..550724792 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -30,31 +30,12 @@ namespace storm {
             template<typename ValueType, typename RewardModelType>
             std::unique_ptr<POMDPCheckResult<ValueType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
-                                                                                                    std::set<uint32_t> const &targetObservations, bool min,
-                                                                                                    uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements) {
-                uint64_t currentResolution = startingResolution;
-                uint64_t currentRefinement = 0;
-                std::unique_ptr<POMDPCheckResult<ValueType>> res = std::make_unique<POMDPCheckResult<ValueType>>(
-                        POMDPCheckResult<ValueType>{storm::utility::one<ValueType>(), storm::utility::zero<ValueType>()});
-                while (currentRefinement < maxNrOfRefinements && !cc.isEqual(storm::utility::zero<ValueType>(), res->OverapproximationValue - res->UnderapproximationValue)) {
-                    STORM_PRINT("--------------------------------------------------------------" << std::endl)
-                    STORM_PRINT("Refinement Step " << currentRefinement + 1 << " - Resolution " << currentResolution << std::endl)
-                    STORM_PRINT("--------------------------------------------------------------" << std::endl)
-                    res = computeReachabilityProbability(pomdp, targetObservations, min, currentResolution);
-                    currentResolution += stepSize;
-                    ++currentRefinement;
-                }
-                STORM_PRINT("Procedure took " << currentRefinement << " refinement steps" << std::endl)
-                return res;
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
-                                                                                             std::set<uint32_t> const &targetObservations, bool min, uint64_t gridResolution,
-                                                                                             bool computeRewards, double explorationThreshold) {
-                //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on
+                                                                                                    std::set<uint32_t> const &targetObservations, bool min, uint64_t gridResolution,
+                                                                                                    double explorationThreshold) {
+                std::srand(time(NULL));
+                // Compute easy upper and lower bounds
                 storm::utility::Stopwatch underlyingWatch(true);
+                // Compute the results on the underlying MDP as a basic overapproximation
                 storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling());
                 underlyingMdpLabeling.addLabel("goal");
                 std::vector<uint64_t> goalStates;
@@ -66,8 +47,7 @@ namespace storm {
                 storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels());
                 auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(
                         std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp));
-                std::string initPropString = computeRewards ? "R" : "P";
-                initPropString += min ? "min" : "max";
+                std::string initPropString = min ? "Pmin" : "Pmax";
                 initPropString += "=? [F \"goal\"]";
                 std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString);
                 std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front();
@@ -76,11 +56,11 @@ namespace storm {
                 std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
                 STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
                 underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
-                auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto overApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                 underlyingWatch.stop();
 
                 storm::utility::Stopwatch positionalWatch(true);
-                // we define some positional scheduler for the POMDP as an experimental lower bound
+                // we define some positional scheduler for the POMDP as a basic lower bound
                 storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
                 for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
                     auto obsStates = pomdp.getStatesWithObservation(obs);
@@ -97,20 +77,62 @@ namespace storm {
                         storm::api::verifyWithSparseEngine<ValueType>(underApproxModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
                 STORM_LOG_ASSERT(underapproxRes, "Result not exist.");
                 underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true)));
-                auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                 positionalWatch.stop();
 
                 STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
 
+                // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
+                // This can probably be improved (i.e. resolutions for single belief states)
+                STORM_PRINT("Initial Resolution: " << gridResolution << std::endl)
+                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), gridResolution);
+                auto overRes = storm::utility::one<ValueType>();
+                auto underRes = storm::utility::zero<ValueType>();
+                uint64_t refinementCounter = 1;
+                std::unique_ptr<POMDPCheckResult<ValueType>> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
+                                                                                          overApproxMap, underApproxMap);
+                // TODO the actual refinement
+                return res;
+            }
+
+            template<typename ValueType, typename RewardModelType>
+            std::unique_ptr<POMDPCheckResult<ValueType>>
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
+                                                                                             std::set<uint32_t> const &targetObservations, bool min,
+                                                                                             std::vector<uint64_t> &observationResolutionVector,
+                                                                                             bool computeRewards, double explorationThreshold,
+                                                                                             boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
+                                                                                             boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
                 STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
+                auto result = computeRefinementFirstStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap,
+                                                         underApproximationMap);
+                return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
+            }
+
+
+            template<typename ValueType, typename RewardModelType>
+            std::unique_ptr<RefinementComponents<ValueType>>
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementFirstStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
+                                                                                                 std::set<uint32_t> const &targetObservations, bool min,
+                                                                                                 std::vector<uint64_t> &observationResolutionVector,
+                                                                                                 bool computeRewards, double explorationThreshold,
+                                                                                                 boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
+                                                                                                 boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
+                bool boundMapsSet = overApproximationMap && underApproximationMap;
+                std::map<uint64_t, ValueType> overMap;
+                std::map<uint64_t, ValueType> underMap;
+                if (boundMapsSet) {
+                    overMap = overApproximationMap.value();
+                    underMap = underApproximationMap.value();
+                }
+
                 std::vector<storm::pomdp::Belief<ValueType>> beliefList;
                 std::vector<bool> beliefIsTarget;
                 std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
-                std::map<uint64_t, ValueType> result;
                 //Use caching to avoid multiple computation of the subsimplices and lambdas
                 std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
                 std::map<uint64_t, std::vector<ValueType>> lambdaCache;
-                std::map<uint64_t, std::vector<uint64_t>> chosenActions;
+                std::map<uint64_t, uint64_t> beliefStateMap;
 
                 std::deque<uint64_t> beliefsToBeExpanded;
 
@@ -128,10 +150,11 @@ namespace storm {
                 beliefList.push_back(initialBelief);
                 beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
 
-                // These are the components to build the MDP from the grid TODO make a better stucture to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?)
-                std::map<uint64_t, uint64_t> beliefStateMap;
+                // These are the components to build the MDP from the grid TODO make a better structure to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?)
+
                 // Reserve states 0 and 1 as always sink/goal states
-                std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}},{{{1, storm::utility::one<ValueType>()}}}};
+                std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}},
+                                                                                          {{{1, storm::utility::one<ValueType>()}}}};
                 // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
                 std::vector<ValueType> hintVector = {storm::utility::zero<ValueType>(), storm::utility::one<ValueType>()};
                 std::vector<uint64_t> targetStates = {1};
@@ -145,7 +168,8 @@ namespace storm {
                 std::map<uint64_t, ValueType> weightedSumUnderMap;
 
                 // for the initial belief, add the triangulated initial states
-                std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution);
+                std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities,
+                                                                                                                              observationResolutionVector[initialBelief.observation]);
                 std::vector<std::vector<ValueType>> initSubSimplex = initTemp.first;
                 std::vector<ValueType> initLambdas = initTemp.second;
                 if(cacheSubsimplices){
@@ -161,15 +185,16 @@ namespace storm {
                         if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) {
                             if (searchResult == 0) {
                                 // the initial belief is on the grid itself
-                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
-                                    tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(mdpResultMap[i]);
-                                    tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(mdpUnderapproxResultMap[i]);
+                                if (boundMapsSet) {
+                                    auto tempWeightedSumOver = storm::utility::zero<ValueType>();
+                                    auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
+                                    for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
+                                        tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
+                                        tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
+                                    }
+                                    weightedSumOverMap[initialBelief.id] = tempWeightedSumOver;
+                                    weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder;
                                 }
-                                weightedSumOverMap[initialBelief.id] = tempWeightedSumOver;
-                                weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder;
-
                                 initInserted = true;
                                 beliefGrid.push_back(initialBelief);
                                 beliefsToBeExpanded.push_back(0);
@@ -177,17 +202,18 @@ namespace storm {
                                                                                                                                     : storm::utility::zero<ValueType>());
                             } else {
                                 // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
+                                if (boundMapsSet) {
+                                    auto tempWeightedSumOver = storm::utility::zero<ValueType>();
+                                    auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
+                                    for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
+                                        tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
+                                        tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
+                                    }
 
-                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
-                                    tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(mdpResultMap[i]);
-                                    tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(mdpUnderapproxResultMap[i]);
+                                    weightedSumOverMap[nextId] = tempWeightedSumOver;
+                                    weightedSumUnderMap[nextId] = tempWeightedSumUnder;
                                 }
 
-                                weightedSumOverMap[nextId] = tempWeightedSumOver;
-                                weightedSumUnderMap[nextId] = tempWeightedSumUnder;
-
                                 storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]};
                                 beliefList.push_back(gridBelief);
                                 beliefGrid.push_back(gridBelief);
@@ -225,16 +251,13 @@ namespace storm {
                     beliefsToBeExpanded.pop_front();
                     bool isTarget = beliefIsTarget[currId];
 
-                    if(cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber<ValueType>(explorationThreshold))){
-                        result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero<ValueType>() : weightedSumOverMap[currId]));
-                        mdpTransitions.push_back({{{1, weightedSumOverMap[currId]},{0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
+                    if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber<ValueType>(explorationThreshold))) {
+                        mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
                         continue;
                     }
 
                     if (isTarget) {
                         // Depending on whether we compute rewards, we select the right initial result
-                        result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero<ValueType>() : storm::utility::one<ValueType>()));
-
                         // MDP stuff
                         std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;
                         targetStates.push_back(beliefStateMap[currId]);
@@ -243,8 +266,6 @@ namespace storm {
                         transitionsInBelief.push_back(transitionInActionBelief);
                         mdpTransitions.push_back(transitionsInBelief);
                     } else {
-                        result.emplace(std::make_pair(currId, storm::utility::zero<ValueType>()));
-
                         uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front();
                         uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
                         std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices);
@@ -271,7 +292,8 @@ namespace storm {
                                     subSimplex = subSimplexCache[idNextBelief];
                                     lambdas = lambdaCache[idNextBelief];
                                 } else {
-                                    std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, gridResolution);
+                                    std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
+                                            beliefList[idNextBelief].probabilities, observationResolutionVector[beliefList[idNextBelief].observation]);
                                     subSimplex = temp.first;
                                     lambdas = temp.second;
                                     if(cacheSubsimplices){
@@ -287,25 +309,28 @@ namespace storm {
                                             storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]};
                                             beliefList.push_back(gridBelief);
                                             beliefGrid.push_back(gridBelief);
-                                            // compute overapproximate value using MDP result map
-                                            auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                            auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                            for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                                tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(mdpResultMap[i]);
-                                                tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(mdpUnderapproxResultMap[i]);
-                                            }
                                             beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-
-                                            if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) {
-                                                hintVector.push_back(tempWeightedSumOver);
+                                            // compute overapproximate value using MDP result map
+                                            if (boundMapsSet) {
+                                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
+                                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
+                                                for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
+                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
+                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
+                                                }
+                                                if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) {
+                                                    hintVector.push_back(tempWeightedSumOver);
+                                                } else {
+                                                    hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
+                                                                                                                                          : storm::utility::zero<ValueType>());
+                                                }
+                                                weightedSumOverMap[nextId] = tempWeightedSumOver;
+                                                weightedSumUnderMap[nextId] = tempWeightedSumUnder;
                                             } else {
-                                                hintVector.push_back(storm::utility::zero<ValueType>());
+                                                hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
+                                                                                                                                      : storm::utility::zero<ValueType>());
                                             }
-
                                             beliefsToBeExpanded.push_back(nextId);
-                                            weightedSumOverMap[nextId] = tempWeightedSumOver;
-                                            weightedSumUnderMap[nextId] = tempWeightedSumUnder;
-
                                             beliefStateMap[nextId] = mdpStateId;
                                             transitionInActionBelief[mdpStateId] = iter->second * lambdas[j];
                                             ++nextId;
@@ -343,11 +368,8 @@ namespace storm {
                 }
                 expansionTimer.stop();
                 STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl)
-                STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl)
                 STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl)
 
-                //auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache,result, chosenActions, gridResolution, min, computeRewards);
-
                 storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size());
                 mdpLabeling.addLabel("init");
                 mdpLabeling.addLabel("target");
@@ -376,9 +398,6 @@ namespace storm {
 
                 auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);
                 auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model);
-                std::vector<std::string> parameterNames;
-                storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames);
-
                 std::string propertyString = computeRewards ? "R" : "P";
                 propertyString += min ? "min" : "max";
                 propertyString += "=? [F \"target\"]";
@@ -394,24 +413,22 @@ namespace storm {
                 overApproxTimer.stop();
                 STORM_LOG_ASSERT(res, "Result not exist.");
                 res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
-                auto resultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto overApprox = resultMap[beliefStateMap[initialBelief.id]];
-                /* storm::utility::Stopwatch underApproxTimer(true);
-                ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
-                                                                         result, chosenActions, gridResolution, initialBelief.id, min, computeRewards);
-                underApproxTimer.stop();*/
+                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto overApprox = overApproxResultMap[beliefStateMap[initialBelief.id]];
 
                 STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl)
-                auto underApprox = storm::utility::zero<ValueType>();
+                auto underApprox = weightedSumUnderMap[initialBelief.id];
                 STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
                 STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
 
-                std::map<uint64_t, ValueType> differences;
-                for(auto const &entry : weightedSumUnderMap){
-                    differences[beliefStateMap[entry.first]] = resultMap[beliefStateMap[entry.first]] - weightedSumUnderMap[entry.first];
+                // Transfer the underapproximation results from the belief id space to the MDP id space
+                std::map<uint64_t, ValueType> underApproxResultMap;
+                for (auto const &belief : beliefGrid) {
+                    underApproxResultMap[beliefStateMap[belief.id]] = weightedSumUnderMap[belief.id];
                 }
 
-                return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{overApprox, underApprox});
+                return std::make_unique<RefinementComponents<ValueType>>(
+                        RefinementComponents<ValueType>{modelPtr, overApprox, underApprox, overApproxResultMap, underApproxResultMap, beliefList, beliefIsTarget, beliefStateMap});
             }
 
             template<typename ValueType, typename RewardModelType>
@@ -536,7 +553,8 @@ namespace storm {
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                                                                    std::set<uint32_t> const &targetObservations, bool min,
                                                                                                    uint64_t gridResolution) {
-                return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true, 0);
+                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), gridResolution);
+                return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, true, 0);
             }
 
             template<typename ValueType, typename RewardModelType>
@@ -544,7 +562,8 @@ namespace storm {
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                                                                         std::set<uint32_t> const &targetObservations, bool min,
                                                                                                         uint64_t gridResolution, double explorationThreshold) {
-                return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false, explorationThreshold);
+                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), gridResolution);
+                return computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold);
             }
 
             template<typename ValueType, typename RewardModelType>
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 438354eca..f61c0f9a6 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -11,8 +11,24 @@ namespace storm {
         namespace modelchecker {
             template<class ValueType>
             struct POMDPCheckResult {
-                ValueType OverapproximationValue;
-                ValueType UnderapproximationValue;
+                ValueType overApproxValue;
+                ValueType underApproxValue;
+            };
+
+            /**
+             *  Struct containing information which is supposed to be persistent over multiple refinement steps
+             *
+             */
+            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
+            struct RefinementComponents {
+                std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> overApproxModelPtr;
+                ValueType overApproxValue;
+                ValueType underApproxValue;
+                std::map<uint64_t, ValueType> &overApproxMap;
+                std::map<uint64_t, ValueType> &underApproxMap;
+                std::vector<storm::pomdp::Belief<ValueType>> &beliefList;
+                std::vector<bool> &beliefIsTarget;
+                std::map<uint64_t, uint64_t> &beliefStateMap;
             };
 
             template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
@@ -21,9 +37,8 @@ namespace storm {
                 explicit ApproximatePOMDPModelchecker();
 
                 std::unique_ptr<POMDPCheckResult<ValueType>>
-                refineReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
-                                               std::set<uint32_t> const &targetObservations, bool min,
-                                               uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements);
+                refineReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, std::set<uint32_t> const &targetObservations, bool min,
+                                              uint64_t gridResolution, double explorationThreshold);
 
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -45,6 +60,24 @@ namespace storm {
                                           uint64_t gridResolution);
 
             private:
+                /**
+                 *
+                 * @param pomdp
+                 * @param targetObservations
+                 * @param min
+                 * @param observationResolutionVector
+                 * @param computeRewards
+                 * @param explorationThreshold
+                 * @param overApproximationMap
+                 * @param underApproximationMap
+                 * @return
+                 */
+                std::unique_ptr<RefinementComponents<ValueType>>
+                computeRefinementFirstStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
+                                           std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
+                                           bool computeRewards, double explorationThreshold, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
+                                           boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);
+
                 /**
                  *
                  * @param pomdp
@@ -57,7 +90,9 @@ namespace storm {
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                        std::set<uint32_t> const &targetObservations, bool min,
-                                       uint64_t gridResolution, bool computeRewards, double explorationThreshold);
+                                       std::vector<uint64_t> &observationResolutionVector, bool computeRewards, double explorationThreshold,
+                                       boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
+                                       boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);
 
                 /**
                  *