From 48395f121884256fe2982afb6e986b027a48b91a Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Sun, 29 Mar 2020 16:11:24 +0200
Subject: [PATCH 01/40] Cmake: Fixed capitalization of z3 and hwloc to silence
 some cmake warnings that recently appear.

---
 resources/3rdparty/CMakeLists.txt                               | 2 +-
 .../cmake/find_modules/{FindHwloc.cmake => FindHWLOC.cmake}     | 0
 resources/cmake/find_modules/FindZ3.cmake                       | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename resources/cmake/find_modules/{FindHwloc.cmake => FindHWLOC.cmake} (100%)

diff --git a/resources/3rdparty/CMakeLists.txt b/resources/3rdparty/CMakeLists.txt
index 46e927d85..acd1fefd1 100644
--- a/resources/3rdparty/CMakeLists.txt
+++ b/resources/3rdparty/CMakeLists.txt
@@ -496,7 +496,7 @@ add_dependencies(sylvan_STATIC sylvan)
 
 list(APPEND STORM_DEP_TARGETS sylvan_STATIC)
 
-find_package(Hwloc QUIET REQUIRED)
+find_package(HWLOC QUIET REQUIRED)
 if(HWLOC_FOUND)
     message(STATUS "Storm - Linking with hwloc ${HWLOC_VERSION}.")
     add_imported_library(hwloc STATIC ${HWLOC_LIBRARIES} "")
diff --git a/resources/cmake/find_modules/FindHwloc.cmake b/resources/cmake/find_modules/FindHWLOC.cmake
similarity index 100%
rename from resources/cmake/find_modules/FindHwloc.cmake
rename to resources/cmake/find_modules/FindHWLOC.cmake
diff --git a/resources/cmake/find_modules/FindZ3.cmake b/resources/cmake/find_modules/FindZ3.cmake
index d0978b3bc..6c6a7beb1 100644
--- a/resources/cmake/find_modules/FindZ3.cmake
+++ b/resources/cmake/find_modules/FindZ3.cmake
@@ -32,7 +32,7 @@ set(Z3_SOLVER ${Z3_EXEC})
 # set the LIBZ3_FOUND variable by utilizing the following macro
 # (which also handles the REQUIRED and QUIET arguments)
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(z3 DEFAULT_MSG
+find_package_handle_standard_args(Z3 DEFAULT_MSG
                                   Z3_LIBRARY Z3_INCLUDE_DIR)
 
 IF (NOT Z3_FIND_QUIETLY)

From 1603f0569de88a81d74d1efb43021aad8ddaf3c3 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Sun, 29 Mar 2020 20:27:45 +0200
Subject: [PATCH 02/40] Silenced a gcc warning.

---
 src/storm-cli-utilities/model-handling.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/storm-cli-utilities/model-handling.h b/src/storm-cli-utilities/model-handling.h
index 0e4fe4812..6dad6fbb0 100644
--- a/src/storm-cli-utilities/model-handling.h
+++ b/src/storm-cli-utilities/model-handling.h
@@ -276,6 +276,7 @@ namespace storm {
                         case ModelProcessingInformation::ValueType::FinitePrecision:
                             return storm::utility::canHandle<double>(mpi.engine, input.preprocessedProperties.is_initialized() ? input.preprocessedProperties.get() : input.properties, input.model.get());
                     }
+                    return false;
                 };
                 mpi.isCompatible = checkCompatibleSettings();
                 if (!mpi.isCompatible) {

From dc7aabc2f1c8a5dd8dd55ba6fcc09bd0e55a593e Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Sun, 29 Mar 2020 20:28:26 +0200
Subject: [PATCH 03/40] Fixed moving a reference away.

---
 src/storm/settings/SettingsManager.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/storm/settings/SettingsManager.cpp b/src/storm/settings/SettingsManager.cpp
index 6a694d09d..da236433c 100644
--- a/src/storm/settings/SettingsManager.cpp
+++ b/src/storm/settings/SettingsManager.cpp
@@ -404,7 +404,7 @@ namespace storm {
             STORM_LOG_THROW(moduleIterator == this->modules.end(), storm::exceptions::IllegalFunctionCallException, "Unable to register module '" << moduleSettings->getModuleName() << "' because a module with the same name already exists.");
             
             // Take over the module settings object.
-            std::string const& moduleName = moduleSettings->getModuleName();
+            std::string moduleName = moduleSettings->getModuleName();
             this->moduleNames.push_back(moduleName);
             this->modules.emplace(moduleSettings->getModuleName(), std::move(moduleSettings));
             auto iterator = this->modules.find(moduleName);

From 743dc3e8b149318d315780e1f47f29fa58a19ee9 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 09:01:05 +0200
Subject: [PATCH 04/40] Cmake: Silence some cmake warnings that recently appear
 (part 2)

---
 resources/3rdparty/CMakeLists.txt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/resources/3rdparty/CMakeLists.txt b/resources/3rdparty/CMakeLists.txt
index acd1fefd1..2a5f13dab 100644
--- a/resources/3rdparty/CMakeLists.txt
+++ b/resources/3rdparty/CMakeLists.txt
@@ -170,8 +170,8 @@ if(Z3_FOUND)
             set(STORM_Z3_API_USES_STANDARD_INTEGERS ON)
         endif()
 
-        add_imported_library(z3 SHARED ${Z3_LIBRARIES} ${Z3_INCLUDE_DIRS})
-        list(APPEND STORM_DEP_TARGETS z3_SHARED)
+        add_imported_library(Z3 SHARED ${Z3_LIBRARIES} ${Z3_INCLUDE_DIRS})
+        list(APPEND STORM_DEP_TARGETS Z3_SHARED)
     else()
         message(WARNING "Storm - Could not obtain Z3 version. Building of Prism/JANI models will not be supported.")
         set(Z3_FOUND FALSE)
@@ -197,13 +197,13 @@ include(${STORM_3RDPARTY_SOURCE_DIR}/include_glpk.cmake)
 #############################################################
 
 if (STORM_USE_GUROBI)
-    find_package(Gurobi QUIET REQUIRED)
+    find_package(GUROBI QUIET REQUIRED)
     set(STORM_HAVE_GUROBI ${GUROBI_FOUND})
     if (GUROBI_FOUND)
 		if (EXISTS ${GUROBI_LIBRARY})
             message (STATUS "Storm - Linking with Gurobi (${GUROBI_CXX_LIBRARY}).")
-            add_imported_library(Gurobi SHARED ${GUROBI_LIBRARY} ${GUROBI_INCLUDE_DIRS})
-            list(APPEND STORM_DEP_TARGETS Gurobi_SHARED)
+            add_imported_library(GUROBI SHARED ${GUROBI_LIBRARY} ${GUROBI_INCLUDE_DIRS})
+            list(APPEND STORM_DEP_TARGETS GUROBI_SHARED)
         else()
             # The FindGurobi.cmake script needs to be updated every now and then as the library file contains the version number...
             message(FATAL_ERROR "Gurobi Library ${GUROBI_LIBRARY} not found. If your Gurobi Version is higher then 9.0.0, please contact the Storm developers.")
@@ -499,8 +499,8 @@ list(APPEND STORM_DEP_TARGETS sylvan_STATIC)
 find_package(HWLOC QUIET REQUIRED)
 if(HWLOC_FOUND)
     message(STATUS "Storm - Linking with hwloc ${HWLOC_VERSION}.")
-    add_imported_library(hwloc STATIC ${HWLOC_LIBRARIES} "")
-    list(APPEND STORM_DEP_TARGETS hwloc_STATIC)
+    add_imported_library(HWLOC STATIC ${HWLOC_LIBRARIES} "")
+    list(APPEND STORM_DEP_TARGETS HWLOC_STATIC)
 else()
     if(${OPERATING_SYSTEM} MATCHES "Linux")
         message(FATAL_ERROR "HWLOC is required on Linux but was not found.")

From 98bb48d3c54e2d35b607c00f07036ea1caf5f9b0 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 12:07:49 +0200
Subject: [PATCH 05/40] BeliefGrid: Adding support for rewards.

---
 src/storm-pomdp/storage/BeliefGrid.h | 33 +++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h
index b0661fcc5..61fe3b370 100644
--- a/src/storm-pomdp/storage/BeliefGrid.h
+++ b/src/storm-pomdp/storage/BeliefGrid.h
@@ -24,6 +24,19 @@ namespace storm {
                 // Intentionally left empty
             }
             
+            void setRewardModel(boost::optional<std::string> rewardModelName = boost::none) {
+                if (rewardModelName) {
+                    auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get());
+                    pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix());
+                } else {
+                    setRewardModel(pomdp.getUniqueRewardModelName());
+                }
+            }
+            
+            void unsetRewardModel() {
+                pomdpActionRewardVector.clear();
+            }
+            
             struct Triangulation {
                 std::vector<BeliefId> gridPoints;
                 std::vector<BeliefValueType> weights;
@@ -89,7 +102,11 @@ namespace storm {
                 BeliefValueType sum = storm::utility::zero<ValueType>();
                 boost::optional<uint32_t> observation;
                 for (auto const& entry : belief) {
-                    uintmax_t entryObservation = pomdp.getObservation(entry.first);
+                    if (entry.first >= pomdp.getNumberOfStates()) {
+                        STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << ".");
+                        return false;
+                    }
+                    uint64_t entryObservation = pomdp.getObservation(entry.first);
                     if (observation) {
                         if (observation.get() != entryObservation) {
                             STORM_LOG_ERROR("Beliefsupport contains different observations.");
@@ -176,6 +193,19 @@ namespace storm {
                 return getOrAddGridPointId(belief);
             }
             
+            ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
+                STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
+                auto result = storm::utility::zero<ValueType>();
+                auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
+                for (auto const &entry : belief) {
+                    uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex;
+                    STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index.");
+                    STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index.");
+                    result += entry.second * pomdpActionRewardVector[choiceIndex];
+                }
+                return result;
+            }
+            
             uint32_t getBeliefObservation(BeliefType belief) {
                 STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief.");
                 return pomdp.getObservation(belief.begin()->first);
@@ -343,6 +373,7 @@ namespace storm {
             }
             
             PomdpType const& pomdp;
+            std::vector<ValueType> pomdpActionRewardVector;
             
             std::vector<BeliefType> gridPoints;
             std::map<BeliefType, BeliefId> gridPointToIdMap;

From a3e92d2f72952e759effe1cf765f7d7cad87a2a9 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 12:18:06 +0200
Subject: [PATCH 06/40] Using the new reward functionalities of BliefGrid. This
 also fixes setting rewards in a wrong way (previously, the same reward was
 assigned to states with the same observation).

---
 .../ApproximatePOMDPModelchecker.cpp          | 71 ++++++++++---------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index fb5264ca8..3aeb81d18 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -137,7 +137,28 @@ namespace storm {
 
                 stream << "##########################################" << std::endl;
             }
-
+            
+            std::shared_ptr<storm::logic::Formula const> createStandardProperty(bool min, bool computeRewards) {
+                std::string propertyString = computeRewards ? "R" : "P";
+                propertyString += min ? "min" : "max";
+                propertyString += "=? [F \"target\"]";
+                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
+                return storm::api::extractFormulasFromProperties(propertyVector).front();
+            }
+            
+            template<typename ValueType>
+            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property, std::vector<ValueType>&& hintVector) {
+                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
+                // Therefore, this method needs the property by reference (and not const reference)
+                auto task = storm::api::createTask<ValueType>(property, false);
+                if (!hintVector.empty()) {
+                    auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
+                    hint.setResultHint(std::move(hintVector));
+                    auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
+                    task.setHint(hintPtr);
+                }
+                return task;
+            }
             
             template<typename ValueType, typename RewardModelType>
             std::unique_ptr<POMDPCheckResult<ValueType>>
@@ -360,6 +381,10 @@ namespace storm {
                 }
 
                 storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>> beliefGrid(pomdp, options.numericPrecision);
+                if (computeRewards) {
+                    beliefGrid.setRewardModel();
+                }
+                
                 bsmap_type beliefStateMap;
 
                 std::deque<uint64_t> beliefsToBeExpanded;
@@ -520,37 +545,27 @@ namespace storm {
                 storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
                 auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
                 if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
+                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
                     for (auto const &iter : beliefStateMap.left) {
                         if (fullyExpandedStates.get(iter.second)) {
-                            auto currentBelief = beliefGrid.getGridPoint(iter.first);
+                            auto const& currentBelief = beliefGrid.getGridPoint(iter.first);
                             auto representativeState = currentBelief.begin()->first;
                             for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
-                                // Add the reward
                                 uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
+                                mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action));
                             }
                         }
                     }
                     overApproxMdp->addRewardModel("default", mdpRewardModel);
-                    overApproxMdp->restrictRewardModels(std::set<std::string>({"default"}));
                 }
                 statistics.overApproximationBuildTime.stop();
                 STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                 overApproxMdp->printModelInformationToStream(std::cout);
 
                 auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
-                std::string propertyString = computeRewards ? "R" : "P";
-                propertyString += min ? "min" : "max";
-                propertyString += "=? [F \"target\"]";
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
-                auto task = storm::api::createTask<ValueType>(property, false);
-                auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
-                hint.setResultHint(hintVector);
-                auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
-                task.setHint(hintPtr);
+                auto property = createStandardProperty(min, computeRewards);
+                auto task = createStandardCheckTask(property, std::move(hintVector));
+
                 statistics.overApproximationCheckTime.start();
                 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
                 statistics.overApproximationCheckTime.stop();
@@ -1172,16 +1187,14 @@ namespace storm {
                 storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
                 auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
                 if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
+                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
                     for (auto const &iter : beliefStateMap.left) {
                         if (fullyExpandedStates.get(iter.second)) {
-                            auto currentBelief = beliefGrid.getGridPoint(iter.first);
+                            auto const& currentBelief = beliefGrid.getGridPoint(iter.first);
                             auto representativeState = currentBelief.begin()->first;
                             for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
-                                // Add the reward
                                 uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
+                                mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action));
                             }
                         }
                     }
@@ -1192,17 +1205,11 @@ namespace storm {
                 model->printModelInformationToStream(std::cout);
                 statistics.underApproximationBuildTime.stop();
 
-                std::string propertyString;
-                if (computeRewards) {
-                    propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
-                } else {
-                    propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
-                }
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
-
+                auto property = createStandardProperty(min, computeRewards);
+                auto task = createStandardCheckTask(property, std::vector<ValueType>());
+                
                 statistics.underApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
                 statistics.underApproximationCheckTime.stop();
                 if (storm::utility::resources::isTerminate() && !res) {
                     return nullptr;

From 87c855531219755df5920538dca0c7755462a1b0 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 12:20:13 +0200
Subject: [PATCH 07/40] Using the new reward functionalities of BliefGrid. This
 also fixes setting rewards in a wrong way (previously, the same reward was
 assigned to states with the same observation). Added auxiliary functions for
 creating properties.

---
 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 3aeb81d18..eac2dad1b 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -1207,8 +1207,8 @@ namespace storm {
 
                 auto property = createStandardProperty(min, computeRewards);
                 auto task = createStandardCheckTask(property, std::vector<ValueType>());
-                
                 statistics.underApproximationCheckTime.start();
+                
                 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
                 statistics.underApproximationCheckTime.stop();
                 if (storm::utility::resources::isTerminate() && !res) {

From 0b552e68132ed8028d9eb3f27e87f41ead80edfc Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 13:13:40 +0200
Subject: [PATCH 08/40] Renamed BeliefGrid to BeliefManager

---
 .../ApproximatePOMDPModelchecker.cpp          | 69 +++++++-------
 .../ApproximatePOMDPModelchecker.h            |  4 +-
 .../storage/{BeliefGrid.h => BeliefManager.h} | 91 ++++++++++---------
 3 files changed, 85 insertions(+), 79 deletions(-)
 rename src/storm-pomdp/storage/{BeliefGrid.h => BeliefManager.h} (87%)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index eac2dad1b..6728535f5 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -380,9 +380,9 @@ namespace storm {
                     underMap = underApproximationMap.value();
                 }
 
-                storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>> beliefGrid(pomdp, options.numericPrecision);
+                auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
                 if (computeRewards) {
-                    beliefGrid.setRewardModel();
+                    beliefManager->setRewardModel(); // TODO: get actual name
                 }
                 
                 bsmap_type beliefStateMap;
@@ -391,9 +391,7 @@ namespace storm {
 
                 statistics.overApproximationBuildTime.start();
                 // Initial belief always has belief ID 0
-                auto initialBeliefId = beliefGrid.getInitialBelief();
-                auto const& initialBelief = beliefGrid.getGridPoint(initialBeliefId);
-                auto initialObservation = beliefGrid.getBeliefObservation(initialBelief);
+                auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief());
                 // These are the components to build the MDP from the grid
                 // Reserve states 0 and 1 as always sink/goal states
                 storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
@@ -419,14 +417,14 @@ namespace storm {
                 std::map<uint64_t, ValueType> weightedSumUnderMap;
 
                 // for the initial belief, add the triangulated initial states
-                auto triangulation = beliefGrid.triangulateBelief(initialBelief, observationResolutionVector[initialObservation]);
+                auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]);
                 uint64_t initialMdpState = nextMdpStateId;
                 ++nextMdpStateId;
                 if (triangulation.size() == 1) {
                     // The initial belief is on the grid itself
                     auto initBeliefId = triangulation.gridPoints.front();
                     if (boundMapsSet) {
-                        auto const& gridPoint = beliefGrid.getGridPoint(initBeliefId);
+                        auto const& gridPoint = beliefManager->getBelief(initBeliefId);
                         weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
                         weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
                     }
@@ -443,7 +441,7 @@ namespace storm {
                         beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
                         ++nextMdpStateId;
                         if (boundMapsSet) {
-                            auto const& gridPoint = beliefGrid.getGridPoint(triangulation.gridPoints[i]);
+                            auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]);
                             weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
                             weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
                         }
@@ -458,7 +456,7 @@ namespace storm {
                 if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                     STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                 }
-                storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false);
+                storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
                 for (auto const& belId : beliefsToBeExpanded) {
                     foundBeliefs.set(belId, true);
                 }
@@ -467,7 +465,7 @@ namespace storm {
                     beliefsToBeExpanded.pop_front();
                     
                     uint64_t currMdpState = beliefStateMap.left.at(currId);
-                    uint32_t currObservation = beliefGrid.getBeliefObservation(currId);
+                    uint32_t currObservation = beliefManager->getBeliefObservation(currId);
 
                     mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                     
@@ -484,17 +482,17 @@ namespace storm {
                     } else {
                         fullyExpandedStates.grow(nextMdpStateId, false);
                         fullyExpandedStates.set(currMdpState, true);
-                        uint64_t someState = beliefGrid.getGridPoint(currId).begin()->first;
+                        uint64_t someState = beliefManager->getBelief(currId).begin()->first;
                         uint64_t numChoices = pomdp.getNumberOfChoices(someState);
 
                         for (uint64_t action = 0; action < numChoices; ++action) {
-                            auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector);
+                            auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
                             // Check for newly found grid points
-                            foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false);
+                            foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
                             for (auto const& successor : successorGridPoints) {
                                 auto successorId = successor.first;
-                                auto const& successorBelief = beliefGrid.getGridPoint(successorId);
-                                auto successorObservation = beliefGrid.getBeliefObservation(successorBelief);
+                                auto const& successorBelief = beliefManager->getBelief(successorId);
+                                auto successorObservation = beliefManager->getBeliefObservation(successorBelief);
                                 if (!foundBeliefs.get(successorId)) {
                                     foundBeliefs.set(successorId);
                                     beliefsToBeExpanded.push_back(successorId);
@@ -548,11 +546,11 @@ namespace storm {
                     storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
                     for (auto const &iter : beliefStateMap.left) {
                         if (fullyExpandedStates.get(iter.second)) {
-                            auto const& currentBelief = beliefGrid.getGridPoint(iter.first);
+                            auto const& currentBelief = beliefManager->getBelief(iter.first);
                             auto representativeState = currentBelief.begin()->first;
                             for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
                                 uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action));
+                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
                             }
                         }
                     }
@@ -575,16 +573,16 @@ namespace storm {
                 STORM_LOG_ASSERT(res, "Result does not exist.");
                 res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
                 auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
+                auto overApprox = overApproxResultMap[initialMdpState];
 
                 STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
                 STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
                 //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(beliefGrid, targetObservations, min, computeRewards, maxUaModelSize);
+                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
                 if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                     // TODO: return other components needed for refinement.
                     //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
-                    return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, initialBeliefId});
+                    return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
                 }
                 
                 STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
@@ -595,7 +593,7 @@ namespace storm {
                                                         underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
                 */
                 return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBeliefId});
+                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
 
             }
 
@@ -1075,7 +1073,7 @@ namespace storm {
 
             template<typename ValueType, typename RewardModelType>
             std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
                                                                                                 std::set<uint32_t> const &targetObservations, bool min,
                                                                                                 bool computeRewards, uint64_t maxModelSize) {
                 // Build the belief MDP until enough states are explored.
@@ -1100,13 +1098,12 @@ namespace storm {
                 bsmap_type beliefStateMap;
                 std::deque<uint64_t> beliefsToBeExpanded;
                 
-                auto initialBeliefId = beliefGrid.getInitialBelief();
-                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId));
-                beliefsToBeExpanded.push_back(initialBeliefId);
+                beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId));
+                beliefsToBeExpanded.push_back(beliefManager->getInitialBelief());
                 ++nextMdpStateId;
 
                 // Expand the believes
-                storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false);
+                storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
                 for (auto const& belId : beliefsToBeExpanded) {
                     foundBeliefs.set(belId, true);
                 }
@@ -1115,8 +1112,8 @@ namespace storm {
                     beliefsToBeExpanded.pop_front();
                     
                     uint64_t currMdpState = beliefStateMap.left.at(currId);
-                    auto const& currBelief = beliefGrid.getGridPoint(currId);
-                    uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief);
+                    auto const& currBelief = beliefManager->getBelief(currId);
+                    uint32_t currObservation = beliefManager->getBeliefObservation(currBelief);
                     
                     mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
                     
@@ -1147,9 +1144,9 @@ namespace storm {
                         uint64_t someState = currBelief.begin()->first;
                         uint64_t numChoices = pomdp.getNumberOfChoices(someState);
                         for (uint64_t action = 0; action < numChoices; ++action) {
-                            auto successorBeliefs = beliefGrid.expand(currId, action);
+                            auto successorBeliefs = beliefManager->expand(currId, action);
                             // Check for newly found beliefs
-                            foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false);
+                            foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
                             for (auto const& successor : successorBeliefs) {
                                 auto successorId = successor.first;
                                 if (!foundBeliefs.get(successorId)) {
@@ -1159,7 +1156,7 @@ namespace storm {
                                     ++nextMdpStateId;
                                 }
                                 auto successorMdpState = beliefStateMap.left.at(successorId);
-                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
+                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted.
                                 mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                             }
                             ++mdpMatrixRow;
@@ -1179,7 +1176,7 @@ namespace storm {
                 storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
                 mdpLabeling.addLabel("init");
                 mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId));
+                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief()));
                 for (auto targetState : targetStates) {
                     mdpLabeling.addLabelToState("target", targetState);
                 }
@@ -1190,11 +1187,11 @@ namespace storm {
                     storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
                     for (auto const &iter : beliefStateMap.left) {
                         if (fullyExpandedStates.get(iter.second)) {
-                            auto const& currentBelief = beliefGrid.getGridPoint(iter.first);
+                            auto const& currentBelief = beliefManager->getBelief(iter.first);
                             auto representativeState = currentBelief.begin()->first;
                             for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
                                 uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action));
+                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
                             }
                         }
                     }
@@ -1207,8 +1204,8 @@ namespace storm {
 
                 auto property = createStandardProperty(min, computeRewards);
                 auto task = createStandardCheckTask(property, std::vector<ValueType>());
-                statistics.underApproximationCheckTime.start();
                 
+                statistics.underApproximationCheckTime.start();
                 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
                 statistics.underApproximationCheckTime.stop();
                 if (storm::utility::resources::isTerminate() && !res) {
@@ -1217,7 +1214,7 @@ namespace storm {
                 STORM_LOG_ASSERT(res, "Result does not exist.");
                 res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true)));
                 auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
+                auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())];
 
                 return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
             }
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 32daa0876..a97d36cb2 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -3,7 +3,7 @@
 #include "storm/models/sparse/Pomdp.h"
 #include "storm/utility/logging.h"
 #include "storm-pomdp/storage/Belief.h"
-#include "storm-pomdp/storage/BeliefGrid.h"
+#include "storm-pomdp/storage/BeliefManager.h"
 #include <boost/bimap.hpp>
 
 #include "storm/storage/jani/Property.h"
@@ -163,7 +163,7 @@ namespace storm {
                                                                                                              std::set<uint32_t> const &targetObservations,
                                                                                                              uint64_t initialBeliefId, bool min, bool computeReward,
                                                                                                              uint64_t maxModelSize);
-                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
+                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
                                                                                                              std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
                                                                                                              uint64_t maxModelSize);
 
diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefManager.h
similarity index 87%
rename from src/storm-pomdp/storage/BeliefGrid.h
rename to src/storm-pomdp/storage/BeliefManager.h
index 61fe3b370..efc6bee2a 100644
--- a/src/storm-pomdp/storage/BeliefGrid.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -11,8 +11,7 @@ namespace storm {
     namespace storage {
         
         template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t>
-        // TODO: Change name. This actually does not only manage grid points.
-        class BeliefGrid {
+        class BeliefManager {
         public:
             
             typedef typename PomdpType::ValueType ValueType;
@@ -20,8 +19,8 @@ namespace storm {
             typedef std::map<StateType, BeliefValueType> BeliefType;
             typedef uint64_t BeliefId;
             
-            BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
-                // Intentionally left empty
+            BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
+                initialBeliefId = computeInitialBelief();
             }
             
             void setRewardModel(boost::optional<std::string> rewardModelName = boost::none) {
@@ -45,13 +44,13 @@ namespace storm {
                 }
             };
             
-            BeliefType const& getGridPoint(BeliefId const& id) const {
-                return gridPoints[id];
+            BeliefType const& getBelief(BeliefId const& id) const {
+                return beliefs[id];
             }
             
-            BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const {
-                auto idIt = gridPointToIdMap.find(gridPoint);
-                STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state.");
+            BeliefId getId(BeliefType const& belief) const {
+                auto idIt = beliefToIdMap.find(belief);
+                STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief.");
                 return idIt->second;
             }
             
@@ -75,7 +74,7 @@ namespace storm {
                 std::stringstream str;
                 str << "(\n";
                 for (uint64_t i = 0; i < t.size(); ++i) {
-                    str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n";
+                    str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n";
                 }
                 str <<")\n";
                 return str.str();
@@ -161,7 +160,7 @@ namespace storm {
                         STORM_LOG_ERROR("Weight greater than one in triangulation.");
                     }
                     weightSum += triangulation.weights[i];
-                    BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]);
+                    BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]);
                     for (auto const& pointEntry : gridPoint) {
                         BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second;
                         triangulatedValue += triangulation.weights[i] * pointEntry.second;
@@ -181,16 +180,8 @@ namespace storm {
                 return true;
             }
             
-            BeliefId getInitialBelief() {
-                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
-                                 "POMDP contains more than one initial state");
-                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
-                                 "POMDP does not contain an initial state");
-                BeliefType belief;
-                belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>();
-                
-                STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
-                return getOrAddGridPointId(belief);
+            BeliefId const& getInitialBelief() const {
+                return initialBeliefId;
             }
             
             ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
@@ -212,10 +203,10 @@ namespace storm {
             }
             
             uint32_t getBeliefObservation(BeliefId beliefId) {
-                return getBeliefObservation(getGridPoint(beliefId));
+                return getBeliefObservation(getBelief(beliefId));
             }
             
-            
+
             Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                 //TODO this can also be simplified using the sparse vector interpretation
                 //TODO Enable chaching for this method?
@@ -286,7 +277,7 @@ namespace storm {
                         if (!cc.isZero(qsj[nrStates - 1])) {
                             gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution;
                         }
-                        result.gridPoints.push_back(getOrAddGridPointId(gridPoint));
+                        result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
                     }
                 }
                 
@@ -294,6 +285,10 @@ namespace storm {
                 return result;
             }
             
+            Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) {
+                return triangulateBelief(getBelief(beliefId), resolution);
+            }
+            
             template<typename DistributionType>
             void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
                 auto insertionRes = distr.emplace(state, value);
@@ -302,19 +297,19 @@ namespace storm {
                 }
             }
             
-            BeliefId getNumberOfGridPointIds() const {
-                return gridPoints.size();
+            BeliefId getNumberOfBeliefIds() const {
+                return beliefs.size();
             }
             
-            std::map<BeliefId, ValueType> expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
+            std::map<BeliefId, ValueType> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
                 std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
                 // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds
                 
-                BeliefType gridPoint = getGridPoint(gridPointId);
+                BeliefType belief = getBelief(beliefId);
                 
                 // Find the probability we go to each observation
                 BeliefType successorObs; // This is actually not a belief but has the same type
-                for (auto const& pointEntry : gridPoint) {
+                for (auto const& pointEntry : belief) {
                     uint64_t state = pointEntry.first;
                     for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
                         if (!storm::utility::isZero(pomdpTransition.getValue())) {
@@ -327,7 +322,7 @@ namespace storm {
                 // Now for each successor observation we find and potentially triangulate the successor belief
                 for (auto const& successor : successorObs) {
                     BeliefType successorBelief;
-                    for (auto const& pointEntry : gridPoint) {
+                    for (auto const& pointEntry : belief) {
                         uint64_t state = pointEntry.first;
                         for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
                             if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) {
@@ -344,7 +339,7 @@ namespace storm {
                             addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
                         }
                     } else {
-                        addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second);
+                        addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second);
                     }
                 }
     
@@ -352,21 +347,33 @@ namespace storm {
                 
             }
             
-            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
-                return expandInternal(gridPointId, actionIndex, observationResolutions);
+            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+                return expandInternal(beliefId, actionIndex, observationResolutions);
             }
             
-            std::map<BeliefId, ValueType> expand(BeliefId const& gridPointId, uint64_t actionIndex) {
-                return expandInternal(gridPointId, actionIndex);
+            std::map<BeliefId, ValueType> expand(BeliefId const& beliefId, uint64_t actionIndex) {
+                return expandInternal(beliefId, actionIndex);
             }
             
         private:
             
-            BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
-                auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size());
+            BeliefId computeInitialBelief() {
+                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
+                                 "POMDP contains more than one initial state");
+                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
+                                 "POMDP does not contain an initial state");
+                BeliefType belief;
+                belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>();
+                
+                STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
+                return getOrAddBeliefId(belief);
+            }
+            
+            BeliefId getOrAddBeliefId(BeliefType const& belief) {
+                auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size());
                 if (insertioRes.second) {
-                    // There actually was an insertion, so add the new grid state
-                    gridPoints.push_back(gridPoint);
+                    // There actually was an insertion, so add the new belief
+                    beliefs.push_back(belief);
                 }
                 // Return the id
                 return insertioRes.first->second;
@@ -375,8 +382,10 @@ namespace storm {
             PomdpType const& pomdp;
             std::vector<ValueType> pomdpActionRewardVector;
             
-            std::vector<BeliefType> gridPoints;
-            std::map<BeliefType, BeliefId> gridPointToIdMap;
+            std::vector<BeliefType> beliefs;
+            std::map<BeliefType, BeliefId> beliefToIdMap;
+            BeliefId initialBeliefId;
+            
             storm::utility::ConstantsComparator<ValueType> cc;
             
             

From 41199ea5c7b14868568deb4dbffbf20487d65294 Mon Sep 17 00:00:00 2001
From: Matthias Volk <matthias.volk@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 14:55:21 +0200
Subject: [PATCH 09/40] Append in dot output for DDs

---
 src/storm/storage/dd/cudd/InternalCuddBdd.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/storm/storage/dd/cudd/InternalCuddBdd.cpp b/src/storm/storage/dd/cudd/InternalCuddBdd.cpp
index a850d6329..647c6b7ce 100644
--- a/src/storm/storage/dd/cudd/InternalCuddBdd.cpp
+++ b/src/storm/storage/dd/cudd/InternalCuddBdd.cpp
@@ -196,7 +196,7 @@ namespace storm {
             
             // Open the file, dump the DD and close it again.
             std::vector<cudd::BDD> cuddBddVector = { this->getCuddBdd() };
-            FILE* filePointer = fopen(filename.c_str() , "w");
+            FILE* filePointer = fopen(filename.c_str() , "a+");
             if (showVariablesIfPossible) {
                 ddManager->getCuddManager().DumpDot(cuddBddVector, ddVariableNames.data(), &ddNames[0], filePointer);
             } else {

From b8ac41f561b175c8469f85d75c6cb9148253112b Mon Sep 17 00:00:00 2001
From: Matthias Volk <matthias.volk@cs.rwth-aachen.de>
Date: Mon, 30 Mar 2020 18:11:13 +0200
Subject: [PATCH 10/40] Fixed problem with stormpy by changing boost::optional
 arguments to const& in GSPNs

---
 src/storm-gspn/storage/gspn/GspnBuilder.cpp | 4 ++--
 src/storm-gspn/storage/gspn/GspnBuilder.h   | 4 ++--
 src/storm-gspn/storage/gspn/Place.cpp       | 2 +-
 src/storm-gspn/storage/gspn/Place.h         | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/storm-gspn/storage/gspn/GspnBuilder.cpp b/src/storm-gspn/storage/gspn/GspnBuilder.cpp
index c39b5c0f7..41ef64f96 100644
--- a/src/storm-gspn/storage/gspn/GspnBuilder.cpp
+++ b/src/storm-gspn/storage/gspn/GspnBuilder.cpp
@@ -13,7 +13,7 @@ namespace storm {
             gspnName = name;
         }
         
-        uint_fast64_t GspnBuilder::addPlace(boost::optional<uint64_t> capacity, uint_fast64_t const& initialTokens, std::string const& name) {
+        uint_fast64_t GspnBuilder::addPlace(boost::optional<uint64_t> const& capacity, uint_fast64_t const& initialTokens, std::string const& name) {
             auto newId = places.size();
             auto place = storm::gspn::Place(newId);
             place.setCapacity(capacity);
@@ -70,7 +70,7 @@ namespace storm {
             return addTimedTransition(priority, rate, 1, name);
         }
         
-        uint_fast64_t GspnBuilder::addTimedTransition(uint_fast64_t const &priority, double const &rate, boost::optional<uint64_t> numServers, std::string const& name) {
+        uint_fast64_t GspnBuilder::addTimedTransition(uint_fast64_t const &priority, double const &rate, boost::optional<uint64_t> const& numServers, std::string const& name) {
             auto trans = storm::gspn::TimedTransition<double>();
             auto newId = GSPN::timedTransitionIdToTransitionId(timedTransitions.size());
             trans.setName(name);
diff --git a/src/storm-gspn/storage/gspn/GspnBuilder.h b/src/storm-gspn/storage/gspn/GspnBuilder.h
index 5e4db4a34..fb4cd2ad7 100644
--- a/src/storm-gspn/storage/gspn/GspnBuilder.h
+++ b/src/storm-gspn/storage/gspn/GspnBuilder.h
@@ -25,7 +25,7 @@ namespace storm {
              *                 A capacity of -1 indicates an unbounded place.
              * @param initialTokens The number of inital tokens in the place.
              */
-            uint_fast64_t addPlace(boost::optional<uint64_t> capacity = 1, uint_fast64_t const& initialTokens = 0, std::string const& name = "");
+            uint_fast64_t addPlace(boost::optional<uint64_t> const& capacity = 1, uint_fast64_t const& initialTokens = 0, std::string const& name = "");
             
             void setPlaceLayoutInfo(uint64_t placeId, LayoutInfo const& layoutInfo);
 
@@ -51,7 +51,7 @@ namespace storm {
              * @param rate The rate for the transition.
              * @param numServers The number of servers this transition has (in case of K-Server semantics) or boost::none (in case of Infinite-Server-Semantics).
              */
-            uint_fast64_t addTimedTransition(uint_fast64_t const &priority, RateType const& rate, boost::optional<uint64_t> numServers, std::string const& name = "");
+            uint_fast64_t addTimedTransition(uint_fast64_t const &priority, RateType const& rate, boost::optional<uint64_t> const& numServers, std::string const& name = "");
             
             void setTransitionLayoutInfo(uint64_t transitionId, LayoutInfo const& layoutInfo);
 
diff --git a/src/storm-gspn/storage/gspn/Place.cpp b/src/storm-gspn/storage/gspn/Place.cpp
index ca9a4a9e1..8d7347936 100644
--- a/src/storm-gspn/storage/gspn/Place.cpp
+++ b/src/storm-gspn/storage/gspn/Place.cpp
@@ -29,7 +29,7 @@ namespace storm {
             return this->numberOfInitialTokens;
         }
 
-        void Place::setCapacity(boost::optional<uint64_t> cap) {
+        void Place::setCapacity(boost::optional<uint64_t> const& cap) {
             this->capacity = cap;
         }
 
diff --git a/src/storm-gspn/storage/gspn/Place.h b/src/storm-gspn/storage/gspn/Place.h
index 55c9da6fb..b6a8b4ca1 100644
--- a/src/storm-gspn/storage/gspn/Place.h
+++ b/src/storm-gspn/storage/gspn/Place.h
@@ -56,7 +56,7 @@ namespace storm {
              * @param capacity The capacity of this place. A non-negative number represents the capacity.
              *                 boost::none indicates that the flag is not set.
              */
-            void setCapacity(boost::optional<uint64_t> capacity);
+            void setCapacity(boost::optional<uint64_t> const& capacity);
 
             /*!
              * Returns the capacity of tokens of this place.

From 37da2b4e1fae34993c05c6153735fc433ad65dfa Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 31 Mar 2020 13:58:45 +0200
Subject: [PATCH 11/40] Added a new model checker that allows to compute
 trivial (but sound) bounds on the value of POMDP states

---
 .../TrivialPomdpValueBoundsModelChecker.h     | 115 ++++++++++++++++++
 src/storm/storage/Distribution.cpp            |  12 +-
 src/storm/storage/Distribution.h              |   5 +
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h

diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
new file mode 100644
index 000000000..862a82a05
--- /dev/null
+++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include "storm-pomdp/analysis/FormulaInformation.h"
+
+#include "storm/api/verification.h"
+#include "storm/models/sparse/Pomdp.h"
+#include "storm/models/sparse/StandardRewardModel.h"
+#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
+#include "storm/storage/Scheduler.h"
+
+#include "storm/utility/macros.h"
+#include "storm/exceptions/UnexpectedException.h"
+#include "storm/exceptions/NotSupportedException.h"
+
+namespace storm {
+    namespace pomdp {
+        namespace modelchecker {
+            template <typename PomdpType>
+            class TrivialPomdpValueBoundsModelChecker {
+            public:
+                typedef typename PomdpType::ValueType ValueType;
+                TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) {
+                    // Intentionally left empty
+                }
+                
+                struct ValueBounds {
+                    std::vector<ValueType> lower;
+                    std::vector<ValueType> upper;
+                };
+                ValueBounds getValueBounds(storm::logic::Formula const& formula) {
+                    return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula));
+                }
+                
+                ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) {
+                    STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis.");
+                    // Compute the values on the fully observable MDP
+                    // We need an actual MDP here so that the apply scheduler method below will work.
+                    // Also, the api call in the next line will require a copy anyway.
+                    auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
+                    auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
+                    STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
+                    STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
+                    std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                    
+                    // Create some positional scheduler for the POMDP
+                    storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
+                    // For each state, we heuristically find a good distribution over output actions.
+                    std::vector<ValueType> fullyObservableChoiceValues(pomdp.getNumberOfChoices());
+                    if (info.isNonNestedExpectedRewardFormula()) {
+                        std::vector<ValueType> actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
+                        pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards);
+                    } else {
+                        pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues);
+                    }
+                    auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
+                    for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
+                        auto obsStates = pomdp.getStatesWithObservation(obs);
+                        storm::storage::Distribution<ValueType, uint_fast64_t> choiceDistribution;
+                        for (auto const &state : obsStates) {
+                            ValueType const& stateValue = fullyObservableResult[state];
+                            assert(stateValue >= storm::utility::zero<ValueType>());
+                            for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
+                                ValueType const& choiceValue = fullyObservableChoiceValues[choice];
+                                assert(choiceValue >= storm::utility::zero<ValueType>());
+                                // Rate this choice by considering the relative difference between the choice value and the (optimal) state value
+                                ValueType choiceRating;
+                                if (stateValue < choiceValue) {
+                                    choiceRating = choiceValue - stateValue;
+                                    if (!storm::utility::isZero(choiceValue)) {
+                                        choiceRating /= choiceValue;
+                                    }
+                                } else {
+                                    choiceRating = stateValue - choiceValue;
+                                    if (!storm::utility::isZero(stateValue)) {
+                                        choiceRating /= stateValue;
+                                    }
+                                }
+                                assert(choiceRating <= storm::utility::one<ValueType>());
+                                assert(choiceRating >= storm::utility::zero<ValueType>());
+                                // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice
+                                if (choiceRating <= storm::utility::convertNumber<ValueType>(0.5)) {
+                                    choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one<ValueType>() - choiceRating);
+                                }
+                            }
+                        }
+                        choiceDistribution.normalize();
+                        for (auto const& state : obsStates) {
+                            pomdpScheduler.setChoice(choiceDistribution, state);
+                        }
+                    }
+                    auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
+                    
+                    auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
+                    STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained.");
+                    STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
+                    std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                    
+                    // Finally prepare the result
+                    ValueBounds result;
+                    if (info.minimize()) {
+                        result.lower = std::move(fullyObservableResult);
+                        result.upper = std::move(pomdpSchedulerResult);
+                    } else {
+                        result.lower = std::move(pomdpSchedulerResult);
+                        result.upper = std::move(fullyObservableResult);
+                    }
+                    return result;
+                }
+    
+            private:
+                PomdpType const& pomdp;
+            };
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/storm/storage/Distribution.cpp b/src/storm/storage/Distribution.cpp
index f40afb402..2290c611c 100644
--- a/src/storm/storage/Distribution.cpp
+++ b/src/storm/storage/Distribution.cpp
@@ -166,7 +166,17 @@ namespace storm {
             }
         }
         
-
+        template<typename ValueType, typename StateType>
+        void Distribution<ValueType, StateType>::normalize() {
+            ValueType sum = storm::utility::zero<ValueType>();
+            for (auto const& entry: distribution) {
+                sum += entry.second;
+            }
+            for (auto& entry: distribution) {
+                entry.second /= sum;
+            }
+        }
+    
         
         template class Distribution<double>;
         template std::ostream& operator<<(std::ostream& out, Distribution<double> const& distribution);
diff --git a/src/storm/storage/Distribution.h b/src/storm/storage/Distribution.h
index d7e0bd2fb..c3ac58dcc 100644
--- a/src/storm/storage/Distribution.h
+++ b/src/storm/storage/Distribution.h
@@ -144,6 +144,11 @@ namespace storm {
              */
             ValueType getProbability(StateType const& state) const;
             
+            /*!
+             * Normalizes the distribution such that the values sum up to one.
+             */
+            void normalize();
+            
         private:
             // A list of states and the probabilities that are assigned to them.
             container_type distribution;

From ab26b6943558e711bb5a7e2c064908914ca444ff Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 31 Mar 2020 14:01:00 +0200
Subject: [PATCH 12/40] Added BeliefMdpExplorer which does most of the work
 when exploring (triangulated Variants of) the BeliefMdp.

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h   | 354 ++++++++++++++++++
 .../ApproximatePOMDPModelchecker.cpp          | 248 ++++--------
 .../ApproximatePOMDPModelchecker.h            |   9 +-
 src/storm-pomdp/storage/BeliefManager.h       |  24 +-
 4 files changed, 443 insertions(+), 192 deletions(-)
 create mode 100644 src/storm-pomdp/builder/BeliefMdpExplorer.h

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
new file mode 100644
index 000000000..107f699ae
--- /dev/null
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -0,0 +1,354 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+#include <deque>
+#include <map>
+#include <boost/optional.hpp>
+
+#include "storm/api/properties.h"
+#include "storm/api/verification.h"
+
+#include "storm/storage/BitVector.h"
+#include "storm/utility/macros.h"
+#include "storm-pomdp/storage/BeliefManager.h"
+#include "storm/utility/SignalHandler.h"
+
+namespace storm {
+    namespace builder {
+        template<typename PomdpType>
+        class BeliefMdpExplorer {
+        public:
+            typedef typename PomdpType::ValueType ValueType;
+            typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
+            typedef typename BeliefManagerType::BeliefId BeliefId;
+            typedef uint64_t MdpStateType;
+            
+            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
+                // Intentionally left empty
+            }
+
+            void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
+                // Reset data from potential previous explorations
+                mdpStateToBeliefIdMap.clear();
+                beliefIdToMdpStateMap.clear();
+                beliefIdsWithMdpState.clear();
+                beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false);
+                lowerValueBounds.clear();
+                upperValueBounds.clear();
+                values.clear();
+                mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true);
+                currentRowCount = 0;
+                startOfCurrentRowGroup = 0;
+                mdpActionRewards.clear();
+                exploredMdp = nullptr;
+                
+                // Add some states with special treatment (if requested)
+                if (extraBottomStateValue) {
+                    extraBottomState = getCurrentNumberOfMdpStates();
+                    mdpStateToBeliefIdMap.push_back(beliefManager->noId());
+                    insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
+
+                    startOfCurrentRowGroup = currentRowCount;
+                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
+                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>());
+                    ++currentRowCount;
+                } else {
+                    extraBottomState = boost::none;
+                }
+                if (extraTargetStateValue) {
+                    extraTargetState = getCurrentNumberOfMdpStates();
+                    mdpStateToBeliefIdMap.push_back(beliefManager->noId());
+                    insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
+                    
+                    startOfCurrentRowGroup = currentRowCount;
+                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
+                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>());
+                    ++currentRowCount;
+                    
+                    targetStates.grow(getCurrentNumberOfMdpStates(), false);
+                    targetStates.set(extraTargetState.get(), true);
+                } else {
+                    extraTargetState = boost::none;
+                }
+                
+                // Set up the initial state.
+                initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
+            }
+    
+            bool hasUnexploredState() const {
+                return !beliefIdsToExplore.empty();
+            }
+    
+            BeliefId exploreNextState() {
+                // Set up the matrix builder
+                finishCurrentRow();
+                startOfCurrentRowGroup = currentRowCount;
+                mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
+                ++currentRowCount;
+                
+                // Pop from the queue.
+                auto result = beliefIdsToExplore.front();
+                beliefIdsToExplore.pop_front();
+                return result;
+            }
+            
+            void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
+                // We first insert the entries of the current row in a separate map.
+                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                if (!storm::utility::isZero(bottomStateValue)) {
+                    STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
+                    internalAddTransition(row, extraBottomState.get(), bottomStateValue);
+                }
+                if (!storm::utility::isZero(targetStateValue)) {
+                    STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none.");
+                    internalAddTransition(row, extraTargetState.get(), targetStateValue);
+                }
+            }
+            
+            void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                internalAddTransition(row, getCurrentMdpState(), value);
+            }
+            
+            /*!
+             * Adds the next transition to the given successor belief
+             * @param localActionIndex
+             * @param transitionTarget
+             * @param value
+             * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition.
+             * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true.
+             */
+            bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
+                // We first insert the entries of the current row in a separate map.
+                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                MdpStateType column;
+                if (ignoreNewBeliefs) {
+                    column = getMdpState(transitionTarget);
+                    if (column == noState()) {
+                        return false;
+                    }
+                } else {
+                    column = getOrAddMdpState(transitionTarget);
+                }
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                internalAddTransition(row, column, value);
+                return true;
+            }
+            
+            void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
+                if (currentRowCount >= mdpActionRewards.size()) {
+                    mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
+                }
+                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
+            }
+            
+            void setCurrentStateIsTarget() {
+                targetStates.grow(getCurrentNumberOfMdpStates(), false);
+                targetStates.set(getCurrentMdpState(), true);
+            }
+            
+            void setCurrentStateIsTruncated() {
+                truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
+                truncatedStates.set(getCurrentMdpState(), true);
+            }
+            
+            void finishExploration() {
+                // Create the tranistion matrix
+                finishCurrentRow();
+                auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
+                
+                // Create a standard labeling
+                storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
+                mdpLabeling.addLabel("init");
+                mdpLabeling.addLabelToState("init", initialMdpState);
+                targetStates.resize(getCurrentNumberOfMdpStates(), false);
+                mdpLabeling.addLabel("target", std::move(targetStates));
+                truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
+                mdpLabeling.addLabel("truncated", std::move(truncatedStates));
+                
+                // Create a standard reward model (if rewards are available)
+                std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
+                if (!mdpActionRewards.empty()) {
+                    mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
+                    mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
+                }
+                
+                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
+                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
+            }
+            
+            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
+                STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
+                return exploredMdp;
+            }
+            
+            MdpStateType getCurrentNumberOfMdpStates() const {
+                return mdpStateToBeliefIdMap.size();
+            }
+    
+            MdpStateType getCurrentNumberOfMdpChoices() const {
+                return currentRowCount;
+            }
+
+            ValueType getLowerValueBoundAtCurrentState() const {
+                return lowerValueBounds[getCurrentMdpState()];
+            }
+
+            ValueType getUpperValueBoundAtCurrentState() const {
+                return upperValueBounds[getCurrentMdpState()];
+            }
+
+            ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
+                return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
+            }
+
+            ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
+                 return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
+            }
+            
+            std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
+                STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
+                auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
+                auto task = createStandardCheckTask(property);
+                
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task));
+                if (res) {
+                    values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                } else {
+                    STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
+                    STORM_LOG_ERROR("No result obtained while checking.");
+                }
+                return values;
+            }
+            
+            ValueType const& getComputedValueAtInitialState() const {
+                STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
+                return values[exploredMdp->getInitialStates().getNextSetIndex(0)];
+            }
+            
+        private:
+            MdpStateType noState() const {
+                return std::numeric_limits<MdpStateType>::max();
+            }
+            
+            std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) {
+                std::string propertyString = computeRewards ? "R" : "P";
+                propertyString += storm::solver::minimize(dir) ? "min" : "max";
+                propertyString += "=? [F \"target\"]";
+                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
+                return storm::api::extractFormulasFromProperties(propertyVector).front();
+            }
+            
+            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) {
+                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
+                // Therefore, this method needs the property by reference (and not const reference)
+                auto task = storm::api::createTask<ValueType>(property, false);
+                auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
+                hint.setResultHint(values);
+                auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
+                task.setHint(hintPtr);
+                return task;
+            }
+            
+            MdpStateType getCurrentMdpState() const {
+                return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1;
+            }
+            
+            MdpStateType getCurrentBeliefId() const {
+                return mdpStateToBeliefIdMap[getCurrentMdpState()];
+            }
+            
+            void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
+                // We first insert the entries of the current row in a separate map.
+                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row.");
+                if (row >= currentRowCount) {
+                    // We are going to start a new row, so insert the entries of the old one
+                    finishCurrentRow();
+                    currentRowCount = row + 1;
+                }
+                STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state.");
+                mdpTransitionsBuilderCurrentRowEntries[column] = value;
+            }
+            
+            void finishCurrentRow() {
+                for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) {
+                    mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second);
+                }
+                mdpTransitionsBuilderCurrentRowEntries.clear();
+            }
+            
+            MdpStateType getMdpState(BeliefId const& beliefId) const {
+                if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) {
+                    return beliefIdToMdpStateMap.at(beliefId);
+                } else {
+                    return noState();
+                }
+            }
+            
+            void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) {
+                lowerValueBounds.push_back(lowerBound);
+                upperValueBounds.push_back(upperBound);
+                // Take the middle value as a hint
+                values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
+                STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states.");
+                STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size.");
+            }
+            
+            MdpStateType getOrAddMdpState(BeliefId const& beliefId) {
+                beliefIdsWithMdpState.grow(beliefId + 1, false);
+                if (beliefIdsWithMdpState.get(beliefId)) {
+                    return beliefIdToMdpStateMap[beliefId];
+                } else {
+                    // Add a new MDP state
+                    beliefIdsWithMdpState.set(beliefId, true);
+                    MdpStateType result = getCurrentNumberOfMdpStates();
+                    assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
+                    mdpStateToBeliefIdMap.push_back(beliefId);
+                    beliefIdToMdpStateMap[beliefId] = result;
+                    // This new belief needs exploration
+                    beliefIdsToExplore.push_back(beliefId);
+                    
+                    insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
+                    return result;
+                }
+            }
+            
+            // Belief state related information
+            std::shared_ptr<BeliefManagerType> beliefManager;
+            std::vector<BeliefId> mdpStateToBeliefIdMap;
+            std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
+            storm::storage::BitVector beliefIdsWithMdpState;
+            
+            // Exploration information
+            std::deque<uint64_t> beliefIdsToExplore;
+            storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder;
+            std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries;
+            std::vector<ValueType> mdpActionRewards;
+            uint64_t startOfCurrentRowGroup;
+            uint64_t currentRowCount;
+            
+            // Special states during exploration
+            boost::optional<MdpStateType> extraTargetState;
+            boost::optional<MdpStateType> extraBottomState;
+            storm::storage::BitVector targetStates;
+            storm::storage::BitVector truncatedStates;
+            MdpStateType initialMdpState;
+
+            // Final Mdp
+            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
+            
+            // Value related information
+            std::vector<ValueType> const& pomdpLowerValueBounds;
+            std::vector<ValueType> const& pomdpUpperValueBounds;
+            std::vector<ValueType> lowerValueBounds;
+            std::vector<ValueType> upperValueBounds;
+            std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
+            
+        };
+    }
+}
\ No newline at end of file
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 6728535f5..42f1872f7 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -22,7 +22,8 @@
 #include "storm/api/properties.h"
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
-
+#include "storm-pomdp/builder/BeliefMdpExplorer.h"
+#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
 
 #include "storm/utility/macros.h"
 #include "storm/utility/SignalHandler.h"
@@ -57,6 +58,10 @@ namespace storm {
                 std::unique_ptr<POMDPCheckResult<ValueType>> result;
                 // Extract the relevant information from the formula
                 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
+                
+                // Compute some initial bounds on the values for each state of the pomdp
+                auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
+                
                 if (formulaInfo.isNonNestedReachabilityProbability()) {
                     // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                     STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
@@ -68,7 +73,7 @@ namespace storm {
                     if (options.doRefinement) {
                         result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
                     } else {
-                        result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
+                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                     }
                 } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
                     // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
@@ -78,7 +83,7 @@ namespace storm {
                     } else {
                         // FIXME: pick the non-unique reward model here
                         STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
-                        result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
+                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                     }
                 } else {
                     STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
@@ -233,8 +238,8 @@ namespace storm {
                 uint64_t refinementCounter = 1;
                 STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
                 std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
-                                                                                                  initialOverApproxMap,
-                                                                                                  initialUnderApproxMap, underApproxModelSize);
+                                                                                                  {},
+                                                                                                  {}, underApproxModelSize);
                 if (res == nullptr) {
                     statistics.refinementSteps = 0;
                     return nullptr;
@@ -335,14 +340,14 @@ namespace storm {
             template<typename ValueType, typename RewardModelType>
             std::unique_ptr<POMDPCheckResult<ValueType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                             std::vector<uint64_t> &observationResolutionVector,
                                                                                              bool computeRewards,
-                                                                                             boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                             boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
+                                                                                             std::vector<ValueType> const& lowerPomdpValueBounds,
+                                                                                             std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                              uint64_t maxUaModelSize) {
                 STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
-                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap,
-                                                         underApproximationMap, maxUaModelSize);
+                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
+                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
+                                                         upperPomdpValueBounds, maxUaModelSize);
                 if (result == nullptr) {
                     return nullptr;
                 }
@@ -353,8 +358,6 @@ namespace storm {
                 }
             }
 
-
-            
             template <typename ValueType, typename BeliefType, typename SummandsType>
             ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
                 ValueType result = storm::utility::zero<ValueType>();
@@ -369,155 +372,64 @@ namespace storm {
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                                  std::vector<uint64_t> &observationResolutionVector,
                                                                                                  bool computeRewards,
-                                                                                                 boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                                 boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
+                                                                                                 std::vector<ValueType> const& lowerPomdpValueBounds,
+                                                                                                 std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                                  uint64_t maxUaModelSize) {
-                bool boundMapsSet = overApproximationMap && underApproximationMap;
-                std::map<uint64_t, ValueType> overMap;
-                std::map<uint64_t, ValueType> underMap;
-                if (boundMapsSet) {
-                    overMap = overApproximationMap.value();
-                    underMap = underApproximationMap.value();
-                }
-
+                
                 auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
                 if (computeRewards) {
                     beliefManager->setRewardModel(); // TODO: get actual name
                 }
                 
-                bsmap_type beliefStateMap;
-
-                std::deque<uint64_t> beliefsToBeExpanded;
-
                 statistics.overApproximationBuildTime.start();
-                // Initial belief always has belief ID 0
-                auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief());
-                // These are the components to build the MDP from the grid
-                // Reserve states 0 and 1 as always sink/goal states
-                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
-                uint64_t extraBottomState = 0;
-                uint64_t extraTargetState = computeRewards ? 0 : 1;
-                uint64_t nextMdpStateId = extraTargetState + 1;
-                uint64_t mdpMatrixRow = 0;
-                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
-                    ++mdpMatrixRow;
-                }
-                // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
-                std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
-                if (!computeRewards) {
-                    hintVector[extraTargetState] = storm::utility::one<ValueType>();
-                }
-                std::vector<uint64_t> targetStates = {extraTargetState};
-                storm::storage::BitVector fullyExpandedStates;
-
-                // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
-                std::map<uint64_t, ValueType> weightedSumOverMap;
-                std::map<uint64_t, ValueType> weightedSumUnderMap;
-
-                // for the initial belief, add the triangulated initial states
-                auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]);
-                uint64_t initialMdpState = nextMdpStateId;
-                ++nextMdpStateId;
-                if (triangulation.size() == 1) {
-                    // The initial belief is on the grid itself
-                    auto initBeliefId = triangulation.gridPoints.front();
-                    if (boundMapsSet) {
-                        auto const& gridPoint = beliefManager->getBelief(initBeliefId);
-                        weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
-                        weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
-                    }
-                    beliefsToBeExpanded.push_back(initBeliefId);
-                    beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
-                    hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                    : storm::utility::zero<ValueType>());
+                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                if (computeRewards) {
+                    explorer.startNewExploration(storm::utility::zero<ValueType>());
                 } else {
-                    // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    for (uint64_t i = 0; i < triangulation.size(); ++i) {
-                        beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
-                        beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
-                        ++nextMdpStateId;
-                        if (boundMapsSet) {
-                            auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]);
-                            weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
-                            weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
-                        }
-                        hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
-                                                                                                                                    : storm::utility::zero<ValueType>());
-                    }
-                    //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
-                    ++mdpMatrixRow;
+                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
                 }
                 
                 // Expand the beliefs to generate the grid on-the-fly
                 if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                     STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                 }
-                storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
-                for (auto const& belId : beliefsToBeExpanded) {
-                    foundBeliefs.set(belId, true);
-                }
-                while (!beliefsToBeExpanded.empty()) {
-                    uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
+                while (explorer.hasUnexploredState()) {
+                    uint64_t currId = explorer.exploreNextState();
                     
-                    uint64_t currMdpState = beliefStateMap.left.at(currId);
                     uint32_t currObservation = beliefManager->getBeliefObservation(currId);
-
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    
                     if (targetObservations.count(currObservation) != 0) {
-                        // Make this state absorbing
-                        targetStates.push_back(currMdpState);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
-                        ++mdpMatrixRow;
-                    } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
-                        // TODO: with rewards we would have to assign the corresponding reward to this transition
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
-                        ++mdpMatrixRow;
+                        explorer.setCurrentStateIsTarget();
+                        explorer.addSelfloopTransition();
                     } else {
-                        fullyExpandedStates.grow(nextMdpStateId, false);
-                        fullyExpandedStates.set(currMdpState, true);
-                        uint64_t someState = beliefManager->getBelief(currId).begin()->first;
-                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
-
-                        for (uint64_t action = 0; action < numChoices; ++action) {
+                        bool stopExploration = false;
+                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                            stopExploration = true;
+                            explorer.setCurrentStateIsTruncated();
+                        }
+                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
+                            ValueType truncationProbability = storm::utility::zero<ValueType>();
+                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
                             auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
-                            // Check for newly found grid points
-                            foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
                             for (auto const& successor : successorGridPoints) {
-                                auto successorId = successor.first;
-                                auto const& successorBelief = beliefManager->getBelief(successorId);
-                                auto successorObservation = beliefManager->getBeliefObservation(successorBelief);
-                                if (!foundBeliefs.get(successorId)) {
-                                    foundBeliefs.set(successorId);
-                                    beliefsToBeExpanded.push_back(successorId);
-                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
-                                    ++nextMdpStateId;
-                                    
-                                    if (boundMapsSet) {
-                                        ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
-                                        ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
-                                        if (cc.isEqual(upperBound, lowerBound)) {
-                                            hintVector.push_back(lowerBound);
-                                        } else {
-                                            hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
-                                        }
-                                        weightedSumOverMap[successorId] = upperBound;
-                                        weightedSumUnderMap[successorId] = lowerBound;
-                                    } else {
-                                        hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
-                                    }
+                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                if (!added) {
+                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                    // We did not explore this successor state. Get a bound on the "missing" value
+                                    truncationProbability += successor.second;
+                                    truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first));
                                 }
-                                auto successorMdpState = beliefStateMap.left.at(successorId);
-                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
-                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                             }
-                            ++mdpMatrixRow;
+                            if (stopExploration) {
+                                if (computeRewards) {
+                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
+                                } else {
+                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                }
+                            }
+                            if (computeRewards) {
+                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
+                            }
                         }
                     }
                     if (storm::utility::resources::isTerminate()) {
@@ -525,64 +437,30 @@ namespace storm {
                         break;
                     }
                 }
-                statistics.overApproximationStates = nextMdpStateId;
-                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
+                statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates();
                 if (storm::utility::resources::isTerminate()) {
                     statistics.overApproximationBuildTime.stop();
                     return nullptr;
                 }
-                fullyExpandedStates.resize(nextMdpStateId, false);
                 
-                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
-                mdpLabeling.addLabel("init");
-                mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", initialMdpState);
-                for (auto targetState : targetStates) {
-                    mdpLabeling.addLabelToState("target", targetState);
-                }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
-                auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
-                    for (auto const &iter : beliefStateMap.left) {
-                        if (fullyExpandedStates.get(iter.second)) {
-                            auto const& currentBelief = beliefManager->getBelief(iter.first);
-                            auto representativeState = currentBelief.begin()->first;
-                            for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
-                                uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
-                            }
-                        }
-                    }
-                    overApproxMdp->addRewardModel("default", mdpRewardModel);
-                }
+                explorer.finishExploration();
                 statistics.overApproximationBuildTime.stop();
                 STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
-                overApproxMdp->printModelInformationToStream(std::cout);
-
-                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
-                auto property = createStandardProperty(min, computeRewards);
-                auto task = createStandardCheckTask(property, std::move(hintVector));
+                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
 
                 statistics.overApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
+                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.overApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return nullptr;
-                }
-                STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
-                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto overApprox = overApproxResultMap[initialMdpState];
 
                 STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
-                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
+                STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
+                
                 //auto underApprox = weightedSumUnderMap[initialBelief.id];
                 auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
                 if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                     // TODO: return other components needed for refinement.
                     //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
-                    return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
+                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
                 }
                 
                 STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
@@ -592,8 +470,8 @@ namespace storm {
                                                         underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
                                                         underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
                 */
-                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
+                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
+                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
 
             }
 
@@ -930,14 +808,14 @@ namespace storm {
             std::unique_ptr<POMDPCheckResult<ValueType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
                 std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
+              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
             }
 
             template<typename ValueType, typename RewardModelType>
             std::unique_ptr<POMDPCheckResult<ValueType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
                 std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
+              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
             }
             
             
@@ -1191,7 +1069,7 @@ namespace storm {
                             auto representativeState = currentBelief.begin()->first;
                             for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
                                 uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
+                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action));
                             }
                         }
                     }
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index a97d36cb2..6216de097 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -117,8 +117,7 @@ namespace storm {
                  */
                 std::shared_ptr<RefinementComponents<ValueType>>
                 computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
-                                           bool computeRewards, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                           boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
+                                           bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
 
                 std::shared_ptr<RefinementComponents<ValueType>>
                 computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
@@ -140,10 +139,8 @@ namespace storm {
                  * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                  */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
-                                       std::vector<uint64_t> &observationResolutionVector, bool computeRewards,
-                                       boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                       boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
+                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
+                                       std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
 
                 /**
                  * Helper to compute an underapproximation of the reachability property.
diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index efc6bee2a..9cb7c039c 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -45,6 +45,8 @@ namespace storm {
             };
             
             BeliefType const& getBelief(BeliefId const& id) const {
+                STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief.");
+                STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range.");
                 return beliefs[id];
             }
             
@@ -54,6 +56,10 @@ namespace storm {
                 return idIt->second;
             }
             
+            BeliefId noId() const {
+                return std::numeric_limits<BeliefId>::max();
+            }
+            
             std::string toString(BeliefType const& belief) const {
                 std::stringstream str;
                 str << "{ ";
@@ -180,11 +186,22 @@ namespace storm {
                 return true;
             }
             
+            template <typename SummandsType>
+            ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) {
+                ValueType result = storm::utility::zero<ValueType>();
+                for (auto const& entry : getBelief(beliefId)) {
+                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
+                }
+                return result;
+            }
+            
+            
             BeliefId const& getInitialBelief() const {
                 return initialBeliefId;
             }
             
-            ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
+            ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const {
+                auto const& belief = getBelief(beliefId);
                 STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
                 auto result = storm::utility::zero<ValueType>();
                 auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
@@ -206,6 +223,11 @@ namespace storm {
                 return getBeliefObservation(getBelief(beliefId));
             }
             
+            uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
+                auto belief = getBelief(beliefId);
+                return pomdp.getNumberOfChoices(belief.begin()->first);
+            }
+            
 
             Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                 //TODO this can also be simplified using the sparse vector interpretation

From 8b0e582ef4d56a09c53179f97be83743efeb0fa9 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 31 Mar 2020 14:16:21 +0200
Subject: [PATCH 13/40] Use the new BeliefMdpExplorer also for the
 underapproximation.

---
 .../ApproximatePOMDPModelchecker.cpp          | 168 ++++++------------
 .../ApproximatePOMDPModelchecker.h            |   2 +-
 2 files changed, 55 insertions(+), 115 deletions(-)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 42f1872f7..9c06c4c06 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -456,7 +456,7 @@ namespace storm {
                 STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
                 
                 //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
+                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds);
                 if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                     // TODO: return other components needed for refinement.
                     //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
@@ -953,91 +953,62 @@ namespace storm {
             std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
                                                                                                 std::set<uint32_t> const &targetObservations, bool min,
-                                                                                                bool computeRewards, uint64_t maxModelSize) {
+                                                                                                bool computeRewards, uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds) {
                 // Build the belief MDP until enough states are explored.
                 //TODO think of other ways to stop exploration besides model size
 
                 statistics.underApproximationBuildTime.start();
-
-                // Reserve states 0 and 1 as always sink/goal states
-                storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
-                uint64_t extraBottomState = 0;
-                uint64_t extraTargetState = computeRewards ? 0 : 1;
-                uint64_t nextMdpStateId = extraTargetState + 1;
-                uint64_t mdpMatrixRow = 0;
-                for (uint64_t state = 0; state < nextMdpStateId; ++state) {
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
-                    mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
-                    ++mdpMatrixRow;
+                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                if (computeRewards) {
+                    explorer.startNewExploration(storm::utility::zero<ValueType>());
+                } else {
+                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
                 }
-                std::vector<uint64_t> targetStates = {extraTargetState};
-                storm::storage::BitVector fullyExpandedStates;
                 
-                bsmap_type beliefStateMap;
-                std::deque<uint64_t> beliefsToBeExpanded;
-                
-                beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId));
-                beliefsToBeExpanded.push_back(beliefManager->getInitialBelief());
-                ++nextMdpStateId;
-
-                // Expand the believes
-                storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
-                for (auto const& belId : beliefsToBeExpanded) {
-                    foundBeliefs.set(belId, true);
+                // Expand the beliefs to generate the grid on-the-fly
+                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
+                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                 }
-                while (!beliefsToBeExpanded.empty()) {
-                    uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
-                    
-                    uint64_t currMdpState = beliefStateMap.left.at(currId);
-                    auto const& currBelief = beliefManager->getBelief(currId);
-                    uint32_t currObservation = beliefManager->getBeliefObservation(currBelief);
-                    
-                    mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
+                while (explorer.hasUnexploredState()) {
+                    uint64_t currId = explorer.exploreNextState();
                     
+                    uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                     if (targetObservations.count(currObservation) != 0) {
-                        // Make this state absorbing
-                        targetStates.push_back(currMdpState);
-                        mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
-                        ++mdpMatrixRow;
-                    } else if (currMdpState > maxModelSize) {
-                        if (min) {
-                            // Get an upper bound here
-                            if (computeRewards) {
-                                // TODO: With minimizing rewards we need an upper bound!
-                                // In other cases, this could be helpflull as well.
-                                // For now, add a selfloop to "generate" infinite reward
-                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
-                            } else {
-                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one<ValueType>());
-                            }
-                        } else {
-                            mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one<ValueType>());
-                        }
-                        ++mdpMatrixRow;
+                        explorer.setCurrentStateIsTarget();
+                        explorer.addSelfloopTransition();
                     } else {
-                        fullyExpandedStates.grow(nextMdpStateId, false);
-                        fullyExpandedStates.set(currMdpState, true);
-                        // Iterate over all actions and add the corresponding transitions
-                        uint64_t someState = currBelief.begin()->first;
-                        uint64_t numChoices = pomdp.getNumberOfChoices(someState);
-                        for (uint64_t action = 0; action < numChoices; ++action) {
-                            auto successorBeliefs = beliefManager->expand(currId, action);
-                            // Check for newly found beliefs
-                            foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
-                            for (auto const& successor : successorBeliefs) {
-                                auto successorId = successor.first;
-                                if (!foundBeliefs.get(successorId)) {
-                                    foundBeliefs.set(successorId);
-                                    beliefsToBeExpanded.push_back(successorId);
-                                    beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
-                                    ++nextMdpStateId;
+                        bool stopExploration = false;
+                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                            stopExploration = true;
+                            explorer.setCurrentStateIsTruncated();
+                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) {
+                            stopExploration = true;
+                            explorer.setCurrentStateIsTruncated();
+                        }
+                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
+                            ValueType truncationProbability = storm::utility::zero<ValueType>();
+                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
+                            auto successors = beliefManager->expand(currId, action);
+                            for (auto const& successor : successors) {
+                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                if (!added) {
+                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                    // We did not explore this successor state. Get a bound on the "missing" value
+                                    truncationProbability += successor.second;
+                                    truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first));
                                 }
-                                auto successorMdpState = beliefStateMap.left.at(successorId);
-                                // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted.
-                                mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
                             }
-                            ++mdpMatrixRow;
+                            if (stopExploration) {
+                                if (computeRewards) {
+                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
+                                } else {
+                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                }
+                            }
+                            if (computeRewards) {
+                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
+                            }
                         }
                     }
                     if (storm::utility::resources::isTerminate()) {
@@ -1045,56 +1016,25 @@ namespace storm {
                         break;
                     }
                 }
-                statistics.underApproximationStates = nextMdpStateId;
+                statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates();
                 if (storm::utility::resources::isTerminate()) {
                     statistics.underApproximationBuildTime.stop();
                     return nullptr;
                 }
-                fullyExpandedStates.resize(nextMdpStateId, false);
-                storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
-                mdpLabeling.addLabel("init");
-                mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief()));
-                for (auto targetState : targetStates) {
-                    mdpLabeling.addLabelToState("target", targetState);
-                }
                 
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
-                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
-                    for (auto const &iter : beliefStateMap.left) {
-                        if (fullyExpandedStates.get(iter.second)) {
-                            auto const& currentBelief = beliefManager->getBelief(iter.first);
-                            auto representativeState = currentBelief.begin()->first;
-                            for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
-                                uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
-                                mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action));
-                            }
-                        }
-                    }
-                    model->addRewardModel("default", mdpRewardModel);
-                    model->restrictRewardModels(std::set<std::string>({"default"}));
-                }
-
-                model->printModelInformationToStream(std::cout);
+                explorer.finishExploration();
                 statistics.underApproximationBuildTime.stop();
+                STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl);
+                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
 
-                auto property = createStandardProperty(min, computeRewards);
-                auto task = createStandardCheckTask(property, std::vector<ValueType>());
-                
                 statistics.underApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
+                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.underApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return nullptr;
-                }
-                STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true)));
-                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())];
 
-                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
+                STORM_PRINT("Time Underapproximation: " <<  statistics.underApproximationCheckTime << " seconds." << std::endl);
+                STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
+                
+                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{explorer.getComputedValueAtInitialState(), {}, {}});
             }
             
 
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 6216de097..925bff5b5 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -162,7 +162,7 @@ namespace storm {
                                                                                                              uint64_t maxModelSize);
                 std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
                                                                                                              std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
-                                                                                                             uint64_t maxModelSize);
+                                                                                                             uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds);
 
                 /**
                  * Constructs the initial belief for the given POMDP

From 94d08d73fb740bf38c747d74f49eeaa1310338c0 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 31 Mar 2020 14:42:38 +0200
Subject: [PATCH 14/40] Capitalized GUROBI in FindGUROBI.cmake file because it
 was not found on linux.

---
 .../cmake/find_modules/{FindGurobi.cmake => FindGUROBI.cmake}     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename resources/cmake/find_modules/{FindGurobi.cmake => FindGUROBI.cmake} (100%)

diff --git a/resources/cmake/find_modules/FindGurobi.cmake b/resources/cmake/find_modules/FindGUROBI.cmake
similarity index 100%
rename from resources/cmake/find_modules/FindGurobi.cmake
rename to resources/cmake/find_modules/FindGUROBI.cmake

From a80553a700df59d89709427d16ebfffe72342b0c Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 31 Mar 2020 14:43:35 +0200
Subject: [PATCH 15/40] Removed a duplicated method in StandardRewardModel
 (setStateActionRewardValue did the same as setStateActionReward)

---
 src/storm/models/sparse/StandardRewardModel.cpp | 5 -----
 src/storm/models/sparse/StandardRewardModel.h   | 9 ---------
 2 files changed, 14 deletions(-)

diff --git a/src/storm/models/sparse/StandardRewardModel.cpp b/src/storm/models/sparse/StandardRewardModel.cpp
index 159c39446..b017ae58a 100644
--- a/src/storm/models/sparse/StandardRewardModel.cpp
+++ b/src/storm/models/sparse/StandardRewardModel.cpp
@@ -349,11 +349,6 @@ namespace storm {
                 return result;
             }
 
-            template<typename ValueType>
-            void StandardRewardModel<ValueType>::setStateActionRewardValue(uint_fast64_t row, ValueType const& value) {
-                this->optionalStateActionRewardVector.get()[row] = value;
-            }
-
             template<typename ValueType>
             template<typename MatrixValueType>
             void StandardRewardModel<ValueType>::clearRewardAtState(uint_fast64_t state, storm::storage::SparseMatrix<MatrixValueType> const& transitions) {
diff --git a/src/storm/models/sparse/StandardRewardModel.h b/src/storm/models/sparse/StandardRewardModel.h
index 2bfed8143..aedd7c535 100644
--- a/src/storm/models/sparse/StandardRewardModel.h
+++ b/src/storm/models/sparse/StandardRewardModel.h
@@ -287,15 +287,6 @@ namespace storm {
                 template<typename MatrixValueType>
                 storm::storage::BitVector getChoicesWithFilter(storm::storage::SparseMatrix<MatrixValueType> const& transitionMatrix, std::function<bool(ValueType const&)> const& filter) const;
 
-                /*!
-                 * Sets the given value in the state-action reward vector at the given row. This assumes that the reward
-                 * model has state-action rewards.
-                 *
-                 * @param row The row at which to set the given value.
-                 * @param value The value to set.
-                 */
-                void setStateActionRewardValue(uint_fast64_t row, ValueType const& value);
-                
                 /*!
                  * Retrieves whether the reward model is empty, i.e. contains no state-, state-action- or transition-based
                  * rewards.

From 37fa53c4d827ff78b27cc49b5d712e02361961e0 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 1 Apr 2020 12:39:02 +0200
Subject: [PATCH 16/40] Added a command-line-switch to disable making a pomdp
 canonic (for prism compatibility)

---
 .../settings/modules/POMDPSettings.cpp        |  6 +++++
 .../settings/modules/POMDPSettings.h          |  1 +
 src/storm-pomdp-cli/storm-pomdp.cpp           | 26 +++++++++----------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp
index 4ba3f8148..57c065f7a 100644
--- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp
+++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp
@@ -13,6 +13,7 @@ namespace storm {
         namespace modules {
             
             const std::string POMDPSettings::moduleName = "pomdp";
+            const std::string noCanonicOption = "nocanonic";
             const std::string exportAsParametricModelOption = "parametric-drn";
             const std::string gridApproximationOption = "gridapproximation";
             const std::string qualitativeReductionOption = "qualitativereduction";
@@ -31,6 +32,7 @@ namespace storm {
             const std::string checkFullyObservableOption = "check-fully-observable";
 
             POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) {
+                this->addOption(storm::settings::OptionBuilder(moduleName, noCanonicOption, false, "If this is set, actions will not be ordered canonically. Could yield incorrect results.").build());
                 this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build());
                 this->addOption(storm::settings::OptionBuilder(moduleName, qualitativeReductionOption, false, "Reduces the model size by performing qualitative analysis (E.g. merge states with prob. 1.").build());
                 this->addOption(storm::settings::OptionBuilder(moduleName, analyzeUniqueObservationsOption, false, "Computes the states with a unique observation").build());
@@ -47,6 +49,10 @@ namespace storm {
 
             }
 
+            bool POMDPSettings::isNoCanonicSet() const {
+                return this->getOption(noCanonicOption).getHasOptionBeenSet();
+            }
+
             bool POMDPSettings::isExportToParametricSet() const {
                 return this->getOption(exportAsParametricModelOption).getHasOptionBeenSet();
             }
diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h
index 768766536..6754ac55c 100644
--- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h
+++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h
@@ -26,6 +26,7 @@ namespace storm {
                 
                 bool isQualitativeReductionSet() const;
 
+                bool isNoCanonicSet() const;
                 bool isGridApproximationSet() const;
                 bool isAnalyzeUniqueObservationsSet() const;
                 bool isMecReductionSet() const;
diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp
index 58637c6e7..9b3026832 100644
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@@ -102,7 +102,7 @@ namespace storm {
                 if (pomdpSettings.isGridApproximationSet()) {
                     STORM_PRINT_AND_LOG("Applying grid approximation... ");
                     auto const& gridSettings = storm::settings::getModule<storm::settings::modules::GridApproximationSettings>();
-                    typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<ValueType>::Options options;
+                    typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<ValueType>>::Options options;
                     options.initialGridResolution = gridSettings.getGridResolution();
                     options.explorationThreshold = storm::utility::convertNumber<ValueType>(gridSettings.getExplorationThreshold());
                     options.doRefinement = gridSettings.isRefineSet();
@@ -117,20 +117,16 @@ namespace storm {
                             STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact.");
                         }
                     }
-                    storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<ValueType> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<ValueType>(*pomdp, options);
-                    std::unique_ptr<storm::pomdp::modelchecker::POMDPCheckResult<ValueType>> result = checker.check(formula);
+                    storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<ValueType>> checker(*pomdp, options);
+                    auto result = checker.check(formula);
                     checker.printStatisticsToStream(std::cout);
-                    if (result) {
-                        if (storm::utility::resources::isTerminate()) {
-                            STORM_PRINT_AND_LOG("\nResult till abort: ")
-                        } else {
-                            STORM_PRINT_AND_LOG("\nResult: ")
-                        }
-                        printResult(result->underApproxValue, result->overApproxValue);
-                        STORM_PRINT_AND_LOG(std::endl);
+                    if (storm::utility::resources::isTerminate()) {
+                        STORM_PRINT_AND_LOG("\nResult till abort: ")
                     } else {
-                        STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl);
+                        STORM_PRINT_AND_LOG("\nResult: ")
                     }
+                    printResult(result.lowerBound, result.upperBound);
+                    STORM_PRINT_AND_LOG(std::endl);
                     analysisPerformed = true;
                 }
                 if (pomdpSettings.isMemlessSearchSet()) {
@@ -263,8 +259,10 @@ namespace storm {
                 STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation.");
             
                 std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> pomdp = model->template as<storm::models::sparse::Pomdp<ValueType>>();
-                storm::transformer::MakePOMDPCanonic<ValueType> makeCanonic(*pomdp);
-                pomdp = makeCanonic.transform();
+                if (!pomdpSettings.isNoCanonicSet()) {
+                    storm::transformer::MakePOMDPCanonic<ValueType> makeCanonic(*pomdp);
+                    pomdp = makeCanonic.transform();
+                }
                 
                 std::shared_ptr<storm::logic::Formula const> formula;
                 if (!symbolicInput.properties.empty()) {

From 71e065449890913cfdf56c81400fa9d6387228ee Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 1 Apr 2020 12:39:56 +0200
Subject: [PATCH 17/40] Changed method signatures to new data structures.

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h   |  10 +-
 .../ApproximatePOMDPModelchecker.cpp          | 765 ++++--------------
 .../ApproximatePOMDPModelchecker.h            | 107 +--
 3 files changed, 200 insertions(+), 682 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index 107f699ae..33e1d1f51 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <boost/optional.hpp>
 
+#include "storm-parsers/api/properties.h"
 #include "storm/api/properties.h"
 #include "storm/api/verification.h"
 
@@ -13,20 +14,25 @@
 #include "storm/utility/macros.h"
 #include "storm-pomdp/storage/BeliefManager.h"
 #include "storm/utility/SignalHandler.h"
+#include "storm/modelchecker/results/CheckResult.h"
+#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
+#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
+#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
 
 namespace storm {
     namespace builder {
-        template<typename PomdpType>
+        template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType>
         class BeliefMdpExplorer {
         public:
             typedef typename PomdpType::ValueType ValueType;
-            typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
+            typedef storm::storage::BeliefManager<PomdpType, BeliefValueType> BeliefManagerType;
             typedef typename BeliefManagerType::BeliefId BeliefId;
             typedef uint64_t MdpStateType;
             
             BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
                 // Intentionally left empty
             }
+            BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
 
             void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
                 // Reset data from potential previous explorations
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 9c06c4c06..436fc3e09 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -15,13 +15,8 @@
 #include "storm/models/sparse/StandardRewardModel.h"
 #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h"
 #include "storm/utility/vector.h"
-#include "storm/modelchecker/results/CheckResult.h"
-#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
-#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
-#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
 #include "storm/api/properties.h"
 #include "storm/api/export.h"
-#include "storm-parsers/api/storm-parsers.h"
 #include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
 
@@ -32,8 +27,8 @@
 namespace storm {
     namespace pomdp {
         namespace modelchecker {
-            template<typename ValueType, typename RewardModelType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Options::Options() {
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() {
                 initialGridResolution = 10;
                 explorationThreshold = storm::utility::zero<ValueType>();
                 doRefinement = true;
@@ -41,61 +36,78 @@ namespace storm {
                 numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
                 cacheSubsimplices = false;
             }
-            template<typename ValueType, typename RewardModelType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Statistics::Statistics() :  overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) {
+                // Intentionally left empty
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::diff(bool relative) const {
+                ValueType diff = upperBound - lowerBound;
+                if (diff < storm::utility::zero<ValueType>()) {
+                    STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
+                    diff = storm::utility::zero<ValueType >();
+                }
+                if (relative && !storm::utility::isZero(upperBound)) {
+                    diff /= upperBound;
+                }
+                return diff;
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Statistics::Statistics() :  overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
                 // intentionally left empty;
             }
             
-            template<typename ValueType, typename RewardModelType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options) : pomdp(pomdp), options(options) {
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) {
                 cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false);
             }
 
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::check(storm::logic::Formula const& formula) {
+            template<typename PomdpModelType, typename BeliefValueType>
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) {
                 // Reset all collected statistics
                 statistics = Statistics();
-                std::unique_ptr<POMDPCheckResult<ValueType>> result;
                 // Extract the relevant information from the formula
                 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
                 
                 // Compute some initial bounds on the values for each state of the pomdp
                 auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
+                Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]);
                 
-                if (formulaInfo.isNonNestedReachabilityProbability()) {
-                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
-                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
-                    if (!formulaInfo.getSinkStates().empty()) {
-                        auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
-                        reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
-                        STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
-                    }
-                    if (options.doRefinement) {
-                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
-                    } else {
-                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
-                    }
-                } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
+                boost::optional<std::string> rewardModelName;
+                if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
                     // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                     STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
-                    if (options.doRefinement) {
-                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true);
+                    if (formulaInfo.isNonNestedReachabilityProbability()) {
+                        if (!formulaInfo.getSinkStates().empty()) {
+                            auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
+                            reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
+                            STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
+                        }
                     } else {
-                        // FIXME: pick the non-unique reward model here
-                        STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
-                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
+                        // Expected reward formula!
+                        rewardModelName = formulaInfo.getRewardModelName();
                     }
                 } else {
                     STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
                 }
+                
+                if (options.doRefinement) {
+                    refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
+                } else {
+                    computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
+                }
                 if (storm::utility::resources::isTerminate()) {
                     statistics.aborted = true;
                 }
                 return result;
             }
             
-            template<typename ValueType, typename RewardModelType>
-            void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::printStatisticsToStream(std::ostream& stream) const {
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::printStatisticsToStream(std::ostream& stream) const {
                 stream << "##### Grid Approximation Statistics ######" << std::endl;
                 stream << "# Input model: " << std::endl;
                 pomdp.printModelInformationToStream(stream);
@@ -143,114 +155,82 @@ namespace storm {
                 stream << "##########################################" << std::endl;
             }
             
-            std::shared_ptr<storm::logic::Formula const> createStandardProperty(bool min, bool computeRewards) {
-                std::string propertyString = computeRewards ? "R" : "P";
-                propertyString += min ? "min" : "max";
-                propertyString += "=? [F \"target\"]";
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                return storm::api::extractFormulasFromProperties(propertyVector).front();
-            }
-            
-            template<typename ValueType>
-            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property, std::vector<ValueType>&& hintVector) {
-                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
-                // Therefore, this method needs the property by reference (and not const reference)
-                auto task = storm::api::createTask<ValueType>(property, false);
-                if (!hintVector.empty()) {
-                    auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
-                    hint.setResultHint(std::move(hintVector));
-                    auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
-                    task.setHint(hintPtr);
-                }
-                return task;
-            }
+
             
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards) {
-                std::srand(time(NULL));
-                // Compute easy upper and lower bounds
-                storm::utility::Stopwatch underlyingWatch(true);
-                // Compute the results on the underlying MDP as a basic overapproximation
-                storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling());
-                // TODO: Is the following really necessary
-                underlyingMdpLabeling.addLabel("__goal__");
-                std::vector<uint64_t> goalStates;
-                for (auto const &targetObs : targetObservations) {
-                    for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) {
-                        underlyingMdpLabeling.addLabelToState("__goal__", goalState);
-                    }
-                }
-                storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels());
-                auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(
-                        std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp));
-                std::string initPropString = computeRewards ? "R" : "P";
-                initPropString += min ? "min" : "max";
-                initPropString += "=? [F \"__goal__\"]";
-                std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString);
-                std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front();
-                STORM_PRINT("Underlying MDP" << std::endl)
-                if (computeRewards) {
-                    underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel());
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
+                
+                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
+                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                 }
-                underlyingMdp.printModelInformationToStream(std::cout);
-                std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(
-                        storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
-                STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
-                underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
-                auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                underlyingWatch.stop();
-
-                storm::utility::Stopwatch positionalWatch(true);
-                // we define some positional scheduler for the POMDP as a basic lower bound
-                storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
-                for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
-                    auto obsStates = pomdp.getStatesWithObservation(obs);
-                    // select a random action for all states with the same observation
-                    uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front());
-                    for (auto const &state : obsStates) {
-                        pomdpScheduler.setChoice(chosenAction, state);
+                
+                uint64_t underApproxSizeThreshold = 0;
+                { // Overapproximation
+                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
+                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                    if (rewardModelName) {
+                        manager->setRewardModel(rewardModelName);
+                    }
+                    auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager);
+                    if (approx) {
+                        STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n");
+                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
+                        ValueType& resultValue = min ? result.lowerBound : result.upperBound;
+                        resultValue = approx->getComputedValueAtInitialState();
+                        underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates();
+                    }
+                }
+                { // Underapproximation (Uses a fresh Belief manager)
+                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                    if (rewardModelName) {
+                        manager->setRewardModel(rewardModelName);
+                    }
+                    auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager);
+                    if (approx) {
+                        STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
+                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
+                        ValueType& resultValue = min ? result.upperBound : result.lowerBound;
+                        resultValue = approx->getComputedValueAtInitialState();
                     }
                 }
-                auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false);
-                if (computeRewards) {
-                    underApproxModel->restrictRewardModels({"std"});
-                }
-                STORM_PRINT("Random Positional Scheduler" << std::endl)
-                underApproxModel->printModelInformationToStream(std::cout);
-                std::unique_ptr<storm::modelchecker::CheckResult> underapproxRes(
-                        storm::api::verifyWithSparseEngine<ValueType>(underApproxModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
-                STORM_LOG_ASSERT(underapproxRes, "Result not exist.");
-                underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true)));
-                auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                positionalWatch.stop();
-
-                STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // "
-                                                       << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl)
-                STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
-
-                // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
-                // This can probably be improved (i.e. resolutions for single belief states)
-                STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl)
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
+                
+                // Set up exploration data
                 std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                std::set<uint32_t> changedObservations;
-                uint64_t underApproxModelSize = 200;
-                uint64_t refinementCounter = 1;
-                STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
-                std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
-                                                                                                  {},
-                                                                                                  {}, underApproxModelSize);
-                if (res == nullptr) {
-                    statistics.refinementSteps = 0;
-                    return nullptr;
+                auto beliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                if (rewardModelName) {
+                    beliefManager->setRewardModel(rewardModelName);
+                }
+                
+                // OverApproximaion
+                auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager);
+                if (!overApproximation) {
+                    return;
                 }
-                ValueType lastMinScore = storm::utility::infinity<ValueType>();
-                while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) ||
-                                                    (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) {
+                ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
+                overApproxValue = overApproximation->getComputedValueAtInitialState();
+                
+                // UnderApproximation TODO: use same belief manager?)
+                uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
+                auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager);
+                if (!underApproximation) {
+                    return;
+                }
+                ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
+                underApproxValue = underApproximation->getComputedValueAtInitialState();
+                
+                // ValueType lastMinScore = storm::utility::infinity<ValueType>();
+                // Start refinement
+                statistics.refinementSteps = 0;
+                while (result.diff() > options.refinementPrecision) {
                     if (storm::utility::resources::isTerminate()) {
                         break;
                     }
                     // TODO the actual refinement
+                    /*
                     // choose which observation(s) to refine
                     std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
                     std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
@@ -286,9 +266,9 @@ namespace storm {
                     }
 
 
-                    /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
-                        obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
-                    }*/
+                    //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
+                       // obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
+                    //}
                     changedObservations.clear();
 
                     //TODO think about some other scoring methods
@@ -301,21 +281,21 @@ namespace storm {
                         observationResolutionVector[i] = maxRes + 1;
                         changedObservations.insert(i);
                     }
-                    /*} else {
-                        lastMinScore = std::min(maxAvgDifference, lastMinScore);
-                        STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
-                        STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
-                        //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
-                        for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
+                    //} else {
+                   //     lastMinScore = std::min(maxAvgDifference, lastMinScore);
+                   //     STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
+                   //     STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
+                   //     //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
+                    //    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
                             //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i])
-                            if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
+                   //         if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
                                 //STORM_PRINT(" *** ")
-                                observationResolutionVector[i] += 1;
-                                changedObservations.insert(i);
-                            }
+                  //              observationResolutionVector[i] += 1;
+                   //             changedObservations.insert(i);
+                    //        }
                             //STORM_PRINT(std::endl)
-                        }
-                    }*/
+                    //    }
+                    //}
                     if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) {
                         underApproxModelSize += 100;
                     }
@@ -327,60 +307,13 @@ namespace storm {
                     STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
                                          cc.isEqual(res->underApproxValue, res->overApproxValue),
                                          "The value for the under-approximation is larger than the value for the over-approximation.");
-                    ++refinementCounter;
-                }
-                statistics.refinementSteps = refinementCounter;
-                if (min) {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->underApproxValue, res->overApproxValue});
-                } else {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->overApproxValue, res->underApproxValue});
-                }
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                             bool computeRewards,
-                                                                                             std::vector<ValueType> const& lowerPomdpValueBounds,
-                                                                                             std::vector<ValueType> const& upperPomdpValueBounds,
-                                                                                             uint64_t maxUaModelSize) {
-                STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
-                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
-                                                         upperPomdpValueBounds, maxUaModelSize);
-                if (result == nullptr) {
-                    return nullptr;
-                }
-                if (min) {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->underApproxValue, result->overApproxValue});
-                } else {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
+                    */
+                    ++statistics.refinementSteps.get();
                 }
             }
 
-            template <typename ValueType, typename BeliefType, typename SummandsType>
-            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
-                ValueType result = storm::utility::zero<ValueType>();
-                for (auto const& entry : belief) {
-                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
-                }
-                return result;
-            }
-            
-            template<typename ValueType, typename RewardModelType>
-            std::shared_ptr<RefinementComponents<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                                 std::vector<uint64_t> &observationResolutionVector,
-                                                                                                 bool computeRewards,
-                                                                                                 std::vector<ValueType> const& lowerPomdpValueBounds,
-                                                                                                 std::vector<ValueType> const& upperPomdpValueBounds,
-                                                                                                 uint64_t maxUaModelSize) {
-                
-                auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
-                if (computeRewards) {
-                    beliefManager->setRewardModel(); // TODO: get actual name
-                }
-                
+            template<typename PomdpModelType, typename BeliefValueType>
+            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager) {
                 statistics.overApproximationBuildTime.start();
                 storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                 if (computeRewards) {
@@ -390,9 +323,6 @@ namespace storm {
                 }
                 
                 // Expand the beliefs to generate the grid on-the-fly
-                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
-                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
-                }
                 while (explorer.hasUnexploredState()) {
                     uint64_t currId = explorer.exploreNextState();
                     
@@ -445,39 +375,20 @@ namespace storm {
                 
                 explorer.finishExploration();
                 statistics.overApproximationBuildTime.stop();
-                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
-                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
 
                 statistics.overApproximationCheckTime.start();
                 explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.overApproximationCheckTime.stop();
-
-                STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
-                STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
-                
-                //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds);
-                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
-                    // TODO: return other components needed for refinement.
-                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
-                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
-                }
                 
-                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
-                /* TODO: return other components needed for refinement.
-                return std::make_unique<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                        underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
-                                                        underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
-                */
-                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
-                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
-
+                return std::make_shared<ExplorerType>(std::move(explorer));
             }
 
-            template<typename ValueType, typename RewardModelType>
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+            /*TODO:
+                       template<typename PomdpModelType, typename BeliefValueType>
             std::shared_ptr<RefinementComponents<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                             std::vector<uint64_t> &observationResolutionVector,
                                                                                             bool computeRewards,
                                                                                             std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
@@ -504,7 +415,7 @@ namespace storm {
 
                 uint64_t nextBeliefId = refinementComponents->beliefList.size();
                 uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
-                std::set<uint64_t> relevantStates;
+                std::set<uint64_t> relevantStates; // The MDP states where the observation has changed
                 for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
                     auto currentBelief = refinementComponents->beliefList[iter.first];
                     if (changedObservations.find(currentBelief.observation) != changedObservations.end()) {
@@ -512,7 +423,7 @@ namespace storm {
                     }
                 }
 
-                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck;
+                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck; // The predecessors of states where the observation has changed
                 for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) {
                     for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) {
                         for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(
@@ -536,6 +447,7 @@ namespace storm {
                                                                                                                               action);
                     std::map<uint64_t, ValueType> transitionInActionBelief;
                     for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
+                        // Expand and triangulate the successor
                         uint32_t observation = iter->first;
                         uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
                                                                                    targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId);
@@ -803,160 +715,12 @@ namespace storm {
                                                         refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap,
                                                         underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId});
             }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
-                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
-                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
+            */
             }
-            
-            
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                                                                                                std::vector<bool> &beliefIsTarget,
-                                                                                                std::set<uint32_t> const &targetObservations,
-                                                                                                uint64_t initialBeliefId, bool min,
-                                                                                                bool computeRewards, uint64_t maxModelSize) {
-                std::set<uint64_t> visitedBelieves;
-                std::deque<uint64_t> beliefsToBeExpanded;
-                bsmap_type beliefStateMap;
-                std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
-                                                                                       {{{1, storm::utility::one<ValueType>()}}}};
-                std::vector<uint64_t> targetStates = {1};
-
-                uint64_t stateId = 2;
-                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId));
-                ++stateId;
-                uint64_t nextId = beliefList.size();
-                uint64_t counter = 0;
 
-                statistics.underApproximationBuildTime.start();
-                // Expand the believes
-                visitedBelieves.insert(initialBeliefId);
-                beliefsToBeExpanded.push_back(initialBeliefId);
-                while (!beliefsToBeExpanded.empty()) {
-                    //TODO think of other ways to stop exploration besides model size
-                    auto currentBeliefId = beliefsToBeExpanded.front();
-                    uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front());
-                    // for targets, we only consider one action with one transition
-                    if (beliefIsTarget[currentBeliefId]) {
-                        // add a self-loop to target states
-                        targetStates.push_back(beliefStateMap.left.at(currentBeliefId));
-                        transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one<ValueType>()}}});
-                    } else if (counter > maxModelSize) {
-                        transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
-                    } else {
-                        // Iterate over all actions and add the corresponding transitions
-                        std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
-                        //TODO add a way to extract the actions from the over-approx and use them here?
-                        for (uint64_t action = 0; action < numChoices; ++action) {
-                            std::map<uint64_t, ValueType> transitionsInStateWithAction;
-                            std::map<uint32_t, ValueType> observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action);
-                            for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) {
-                                uint32_t observation = iter->first;
-                                uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId],
-                                                                                           action,
-                                                                                           observation, nextId);
-                                nextId = beliefList.size();
-                                if (visitedBelieves.insert(nextBeliefId).second) {
-                                    beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId));
-                                    ++stateId;
-                                    beliefsToBeExpanded.push_back(nextBeliefId);
-                                    ++counter;
-                                }
-                                transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second;
-                            }
-                            actionTransitionStorage.push_back(transitionsInStateWithAction);
-                        }
-                        transitions.push_back(actionTransitionStorage);
-                    }
-                    beliefsToBeExpanded.pop_front();
-                    if (storm::utility::resources::isTerminate()) {
-                        statistics.underApproximationBuildAborted = true;
-                        break;
-                    }
-                }
-                statistics.underApproximationStates = transitions.size();
-                if (storm::utility::resources::isTerminate()) {
-                    statistics.underApproximationBuildTime.stop();
-                    return nullptr;
-                }
+            template<typename PomdpModelType, typename BeliefValueType>
+            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager) {
                 
-                storm::models::sparse::StateLabeling labeling(transitions.size());
-                labeling.addLabel("init");
-                labeling.addLabel("target");
-                labeling.addLabelToState("init", 0);
-                for (auto targetState : targetStates) {
-                    labeling.addLabelToState("target", targetState);
-                }
-
-                std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> model;
-                auto transitionMatrix = buildTransitionMatrix(transitions);
-                if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) {
-                    transitionMatrix.makeRowGroupingTrivial();
-                }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(transitionMatrix, labeling);
-                storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
-                    for (auto const &iter : beliefStateMap.left) {
-                        auto currentBelief = beliefList[iter.first];
-                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                        for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
-                            // Add the reward
-                            rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                             getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                  currentBelief));
-                        }
-                    }
-                    underApproxMdp.addRewardModel("std", rewardModel);
-                    underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
-                }
-                model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
-
-                model->printModelInformationToStream(std::cout);
-                statistics.underApproximationBuildTime.stop();
-
-                std::string propertyString;
-                if (computeRewards) {
-                    propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
-                } else {
-                    propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
-                }
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
-
-                statistics.underApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
-                statistics.underApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return nullptr;
-                }
-                STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
-                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
-
-                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
-                                                                                                std::set<uint32_t> const &targetObservations, bool min,
-                                                                                                bool computeRewards, uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds) {
-                // Build the belief MDP until enough states are explored.
-                //TODO think of other ways to stop exploration besides model size
-
                 statistics.underApproximationBuildTime.start();
                 storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                 if (computeRewards) {
@@ -981,7 +745,7 @@ namespace storm {
                         if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                             stopExploration = true;
                             explorer.setCurrentStateIsTruncated();
-                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) {
+                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) {
                             stopExploration = true;
                             explorer.setCurrentStateIsTruncated();
                         }
@@ -1024,249 +788,22 @@ namespace storm {
                 
                 explorer.finishExploration();
                 statistics.underApproximationBuildTime.stop();
-                STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl);
-                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
 
                 statistics.underApproximationCheckTime.start();
                 explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.underApproximationCheckTime.stop();
 
-                STORM_PRINT("Time Underapproximation: " <<  statistics.underApproximationCheckTime << " seconds." << std::endl);
-                STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
-                
-                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{explorer.getComputedValueAtInitialState(), {}, {}});
-            }
-            
-
-            template<typename ValueType, typename RewardModelType>
-            storm::storage::SparseMatrix<ValueType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions) {
-                uint_fast64_t currentRow = 0;
-                uint_fast64_t currentRowGroup = 0;
-                uint64_t nrColumns = transitions.size();
-                uint64_t nrRows = 0;
-                uint64_t nrEntries = 0;
-                for (auto const &actionTransitions : transitions) {
-                    for (auto const &map : actionTransitions) {
-                        nrEntries += map.size();
-                        ++nrRows;
-                    }
-                }
-                storm::storage::SparseMatrixBuilder<ValueType> smb(nrRows, nrColumns, nrEntries, true, true);
-                for (auto const &actionTransitions : transitions) {
-                    smb.newRowGroup(currentRow);
-                    for (auto const &map : actionTransitions) {
-                        for (auto const &transition : map) {
-                            smb.addNextValue(currentRow, transition.first, transition.second);
-                        }
-                        ++currentRow;
-                    }
-                    ++currentRowGroup;
-                }
-                return smb.build();
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInVector(
-                    std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
-                    std::map<uint64_t, ValueType> &probabilities) {
-                // TODO This one is quite slow
-                for (auto const &belief : grid) {
-                    if (belief.observation == observation) {
-                        bool same = true;
-                        for (auto const &probEntry : belief.probabilities) {
-                            if (probabilities.find(probEntry.first) == probabilities.end()) {
-                                same = false;
-                                break;
-                            }
-                            if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) {
-                                same = false;
-                                break;
-                            }
-                        }
-                        if (same) {
-                            return belief.id;
-                        }
-                    }
-                }
-                return -1;
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            storm::pomdp::Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(uint64_t id) {
-                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
-                                 "POMDP contains more than one initial state");
-                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
-                                 "POMDP does not contain an initial state");
-                std::map<uint64_t, ValueType> distribution;
-                uint32_t observation = 0;
-                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
-                    if (pomdp.getInitialStates()[state] == 1) {
-                        distribution[state] = storm::utility::one<ValueType>();
-                        observation = pomdp.getObservation(state);
-                        break;
-                    }
-                }
-                return storm::pomdp::Belief<ValueType>{id, observation, distribution};
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeSubSimplexAndLambdas(
-                    std::map<uint64_t, ValueType> &probabilities, uint64_t resolution, uint64_t nrStates) {
-
-                //TODO this can also be simplified using the sparse vector interpretation
-
-                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
-                // Variable names are based on the paper
-                std::vector<ValueType> x(nrStates);
-                std::vector<ValueType> v(nrStates);
-                std::vector<ValueType> d(nrStates);
-                auto convResolution = storm::utility::convertNumber<ValueType>(resolution);
-
-                for (size_t i = 0; i < nrStates; ++i) {
-                    for (auto const &probEntry : probabilities) {
-                        if (probEntry.first >= i) {
-                            x[i] += convResolution * probEntry.second;
-                        }
-                    }
-                    v[i] = storm::utility::floor(x[i]);
-                    d[i] = x[i] - v[i];
-                }
-
-                auto p = storm::utility::vector::getSortedIndices(d);
-
-                std::vector<std::vector<ValueType>> qs(nrStates, std::vector<ValueType>(nrStates));
-                for (size_t i = 0; i < nrStates; ++i) {
-                    if (i == 0) {
-                        for (size_t j = 0; j < nrStates; ++j) {
-                            qs[i][j] = v[j];
-                        }
-                    } else {
-                        for (size_t j = 0; j < nrStates; ++j) {
-                            if (j == p[i - 1]) {
-                                qs[i][j] = qs[i - 1][j] + storm::utility::one<ValueType>();
-                            } else {
-                                qs[i][j] = qs[i - 1][j];
-                            }
-                        }
-                    }
-                }
-                std::vector<std::map<uint64_t, ValueType>> subSimplex(nrStates);
-                for (size_t j = 0; j < nrStates; ++j) {
-                    for (size_t i = 0; i < nrStates - 1; ++i) {
-                        if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][i] - qs[j][i + 1])) {
-                            subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution;
-                        }
-                    }
-
-                    if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][nrStates - 1])) {
-                        subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution;
-                    }
-                }
-
-                std::vector<ValueType> lambdas(nrStates, storm::utility::zero<ValueType>());
-                auto sum = storm::utility::zero<ValueType>();
-                for (size_t i = 1; i < nrStates; ++i) {
-                    lambdas[i] = d[p[i - 1]] - d[p[i]];
-                    sum += d[p[i - 1]] - d[p[i]];
-                }
-                lambdas[0] = storm::utility::one<ValueType>() - sum;
-
-                return std::make_pair(subSimplex, lambdas);
-            }
-
-
-            template<typename ValueType, typename RewardModelType>
-            std::map<uint32_t, ValueType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction(
-                    storm::pomdp::Belief<ValueType> &belief,
-                    uint64_t actionIndex) {
-                std::map<uint32_t, ValueType> res;
-                // the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
-                std::map<uint64_t, ValueType> postProbabilities;
-                for (auto const &probEntry : belief.probabilities) {
-                    uint64_t state = probEntry.first;
-                    auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
-                    for (auto const &entry : row) {
-                        if (entry.getValue() > 0) {
-                            postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
-                        }
-                    }
-                }
-                for (auto const &probEntry : postProbabilities) {
-                    uint32_t observation = pomdp.getObservation(probEntry.first);
-                    if (res.count(observation) == 0) {
-                        res[observation] = probEntry.second;
-                    } else {
-                        res[observation] += probEntry.second;
-                    }
-                }
-
-                return res;
+                return std::make_shared<ExplorerType>(std::move(explorer));
             }
 
-            template<typename ValueType, typename RewardModelType>
-            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                    std::vector<bool> &beliefIsTarget, std::set<uint32_t> const &targetObservations, storm::pomdp::Belief<ValueType> &belief, uint64_t actionIndex,
-                    uint32_t observation, uint64_t id) {
-                std::map<uint64_t, ValueType> distributionAfter;
-                for (auto const &probEntry : belief.probabilities) {
-                    uint64_t state = probEntry.first;
-                    auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
-                    for (auto const &entry : row) {
-                        if (pomdp.getObservation(entry.getColumn()) == observation) {
-                            distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
-                        }
-                    }
-                }
-                // We have to normalize the distribution
-                auto sum = storm::utility::zero<ValueType>();
-                for (auto const &entry : distributionAfter) {
-                    sum += entry.second;
-                }
-
-                for (auto const &entry : distributionAfter) {
-                    distributionAfter[entry.first] /= sum;
-                }
-                if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) {
-                    auto res = getBeliefIdInVector(beliefList, observation, distributionAfter);
-                    return res;
-                } else {
-                    beliefList.push_back(storm::pomdp::Belief<ValueType>{id, observation, distributionAfter});
-                    beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                    return id;
-                }
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
+                // TODO
             }
 
-            template<typename ValueType, typename RewardModelType>
-            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
-                auto result = storm::utility::zero<ValueType>();
-                for (auto const &probEntry : belief) {
-                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
-                }
-                return result;
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
-                auto result = storm::utility::zero<ValueType>();
-                for (auto const &probEntry : belief.probabilities) {
-                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
-                }
-                return result;
-            }
-
-
-            template
-            class ApproximatePOMDPModelchecker<double>;
-
-#ifdef STORM_HAVE_CARL
-
-            template
-            class ApproximatePOMDPModelchecker<storm::RationalNumber>;
+            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>;
+            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
 
-#endif
         }
     }
 }
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 925bff5b5..0d59ac31a 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -4,6 +4,7 @@
 #include "storm/utility/logging.h"
 #include "storm-pomdp/storage/Belief.h"
 #include "storm-pomdp/storage/BeliefManager.h"
+#include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include <boost/bimap.hpp>
 
 #include "storm/storage/jani/Property.h"
@@ -17,12 +18,6 @@ namespace storm {
         namespace modelchecker {
             typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
 
-            template<class ValueType>
-            struct POMDPCheckResult {
-                ValueType overApproxValue;
-                ValueType underApproxValue;
-            };
-
             /**
              *  Struct containing information which is supposed to be persistent over multiple refinement steps
              *
@@ -49,9 +44,13 @@ namespace storm {
                 bsmap_type underApproxBeliefStateMap;
             };
 
-            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
+            template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
             class ApproximatePOMDPModelchecker {
             public:
+                typedef typename PomdpModelType::ValueType ValueType;
+                typedef typename PomdpModelType::RewardModelType RewardModelType;
+                typedef storm::storage::BeliefManager<PomdpModelType, BeliefValueType> BeliefManagerType;
+                typedef storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType> ExplorerType;
                 
                 struct Options {
                     Options();
@@ -63,85 +62,60 @@ namespace storm {
                     bool cacheSubsimplices; /// Enables caching of subsimplices
                 };
                 
-                ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options = Options());
+                struct Result {
+                    Result(ValueType lower, ValueType upper);
+                    ValueType lowerBound;
+                    ValueType upperBound;
+                    ValueType diff (bool relative = false) const;
+                };
+                
+                ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options());
                 
-                std::unique_ptr<POMDPCheckResult<ValueType>> check(storm::logic::Formula const& formula);
+                Result check(storm::logic::Formula const& formula);
 
                 void printStatisticsToStream(std::ostream& stream) const;
                 
             private:
                 /**
-                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
+                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
                  *
-                 * @param targetObservations the set of observations to be reached
-                 * @param min true if minimum probability is to be computed
-                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 * @param targetObservations set of target observations
+                 * @param min true if minimum value is to be computed
+                 * @param observationResolutionVector vector containing the resolution to be used for each observation
+                 * @param computeRewards true if rewards are to be computed, false if probability is computed
+                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
+                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
+                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                  */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards);
-
+                void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
+                
+                
                 /**
-                 * Compute the reachability probability of given target observations on a POMDP for the given resolution only.
-                 * On-the-fly state space generation is used for the overapproximation
+                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
                  *
                  * @param targetObservations the set of observations to be reached
                  * @param min true if minimum probability is to be computed
-                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                  */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min);
+                void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
 
                 /**
-                 * Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
-                 * On-the-fly state space generation is used for the overapproximation
-                 *
-                 * @param targetObservations the set of observations to be reached
-                 * @param min true if minimum rewards are to be computed
-                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                  */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min);
+                std::shared_ptr<ExplorerType> computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager);
+
+                void refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
 
-            private:
                 /**
-                 * Helper method to compute the inital step of the refinement loop
-                 *
-                 * @param targetObservations set of target observations
-                 * @param min true if minimum value is to be computed
-                 * @param observationResolutionVector vector containing the resolution to be used for each observation
-                 * @param computeRewards true if rewards are to be computed, false if probability is computed
-                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
-                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
-                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
-                 * @return struct containing components generated during the computation to be used in later refinement iterations
+                 * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
                  */
-                std::shared_ptr<RefinementComponents<ValueType>>
-                computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
-                                           bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
+                std::shared_ptr<ExplorerType> computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager);
 
-                std::shared_ptr<RefinementComponents<ValueType>>
-                computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
-                                      bool computeRewards, std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
-                                      std::set<uint32_t> changedObservations,
-                                      boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                      boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
+                void refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
 
-                /**
-                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
-                 *
-                 * @param targetObservations set of target observations
-                 * @param min true if minimum value is to be computed
-                 * @param observationResolutionVector vector containing the resolution to be used for each observation
-                 * @param computeRewards true if rewards are to be computed, false if probability is computed
-                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
-                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
-                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
-                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
-                 */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
-                                       std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
 
+#ifdef REMOVE_THIS
                 /**
                  * Helper to compute an underapproximation of the reachability property.
                  * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
@@ -243,7 +217,8 @@ namespace storm {
                  */
                 ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
                 ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
-
+#endif //REMOVE_THIS
+                
                 struct Statistics {
                     Statistics();
                     boost::optional<uint64_t> refinementSteps;
@@ -262,7 +237,7 @@ namespace storm {
                 };
                 Statistics statistics;
                 
-                storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp;
+                PomdpModelType const& pomdp;
                 Options options;
                 storm::utility::ConstantsComparator<ValueType> cc;
             };

From 5388ed98e3d56a379a9350b169db59f1d0509a41 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 1 Apr 2020 12:51:32 +0200
Subject: [PATCH 18/40] BeliefMdpExplorer: Added a few asserts so that methods
 can only be called in the corresponding exploration phase

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 38 +++++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index 33e1d1f51..86f49fe02 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -29,12 +29,20 @@ namespace storm {
             typedef typename BeliefManagerType::BeliefId BeliefId;
             typedef uint64_t MdpStateType;
             
-            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
+            enum class Status {
+                Uninitialized,
+                Exploring,
+                ModelFinished,
+                ModelChecked
+            };
+            
+            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) {
                 // Intentionally left empty
             }
             BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
 
             void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
+                status = Status::Exploring;
                 // Reset data from potential previous explorations
                 mdpStateToBeliefIdMap.clear();
                 beliefIdToMdpStateMap.clear();
@@ -83,10 +91,12 @@ namespace storm {
             }
     
             bool hasUnexploredState() const {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 return !beliefIdsToExplore.empty();
             }
     
             BeliefId exploreNextState() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 // Set up the matrix builder
                 finishCurrentRow();
                 startOfCurrentRowGroup = currentRowCount;
@@ -100,6 +110,7 @@ namespace storm {
             }
             
             void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 // We first insert the entries of the current row in a separate map.
                 // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                 
@@ -115,6 +126,7 @@ namespace storm {
             }
             
             void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 uint64_t row = startOfCurrentRowGroup + localActionIndex;
                 internalAddTransition(row, getCurrentMdpState(), value);
             }
@@ -128,6 +140,7 @@ namespace storm {
              * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true.
              */
             bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 // We first insert the entries of the current row in a separate map.
                 // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                 MdpStateType column;
@@ -145,6 +158,7 @@ namespace storm {
             }
             
             void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 if (currentRowCount >= mdpActionRewards.size()) {
                     mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
                 }
@@ -153,16 +167,19 @@ namespace storm {
             }
             
             void setCurrentStateIsTarget() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 targetStates.grow(getCurrentNumberOfMdpStates(), false);
                 targetStates.set(getCurrentMdpState(), true);
             }
             
             void setCurrentStateIsTruncated() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
                 truncatedStates.set(getCurrentMdpState(), true);
             }
             
             void finishExploration() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 // Create the tranistion matrix
                 finishCurrentRow();
                 auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
@@ -185,26 +202,32 @@ namespace storm {
                 
                 storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
                 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
+                status = Status::ModelFinished;
             }
             
             std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
+                STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status.");
                 STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
                 return exploredMdp;
             }
             
             MdpStateType getCurrentNumberOfMdpStates() const {
+                STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
                 return mdpStateToBeliefIdMap.size();
             }
     
             MdpStateType getCurrentNumberOfMdpChoices() const {
+                STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
                 return currentRowCount;
             }
 
             ValueType getLowerValueBoundAtCurrentState() const {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 return lowerValueBounds[getCurrentMdpState()];
             }
 
             ValueType getUpperValueBoundAtCurrentState() const {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 return upperValueBounds[getCurrentMdpState()];
             }
 
@@ -216,7 +239,8 @@ namespace storm {
                  return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
             }
             
-            std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
+            void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
+                STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status.");
                 STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
                 auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
                 auto task = createStandardCheckTask(property);
@@ -228,12 +252,18 @@ namespace storm {
                     STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
                     STORM_LOG_ERROR("No result obtained while checking.");
                 }
+                status = Status::ModelChecked;
+            }
+            
+            std::vector<ValueType> const& getValuesOfExploredMdp() const {
+                STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
                 return values;
             }
             
             ValueType const& getComputedValueAtInitialState() const {
+                STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
                 STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
-                return values[exploredMdp->getInitialStates().getNextSetIndex(0)];
+                return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)];
             }
             
         private:
@@ -355,6 +385,8 @@ namespace storm {
             std::vector<ValueType> upperValueBounds;
             std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
             
+            // The current status of this explorer
+            Status status;
         };
     }
 }
\ No newline at end of file

From 79641ef1310d99a187fba3083fba9c3c6dddfb26 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 1 Apr 2020 15:59:31 +0200
Subject: [PATCH 19/40] Started to make the BeliefMdpExplorer more flexible,
 allowing to restart the exploration

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 224 ++++++++++++++------
 src/storm-pomdp/storage/BeliefManager.h     |   4 +-
 2 files changed, 162 insertions(+), 66 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index 86f49fe02..e13e20cf3 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -11,6 +11,7 @@
 #include "storm/api/verification.h"
 
 #include "storm/storage/BitVector.h"
+#include "storm/storage/SparseMatrix.h"
 #include "storm/utility/macros.h"
 #include "storm-pomdp/storage/BeliefManager.h"
 #include "storm/utility/SignalHandler.h"
@@ -19,6 +20,7 @@
 #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
 #include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
 
+
 namespace storm {
     namespace builder {
         template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType>
@@ -46,16 +48,17 @@ namespace storm {
                 // Reset data from potential previous explorations
                 mdpStateToBeliefIdMap.clear();
                 beliefIdToMdpStateMap.clear();
-                beliefIdsWithMdpState.clear();
-                beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false);
+                exploredBeliefIds.clear();
+                exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
+                mdpStatesToExplore.clear();
                 lowerValueBounds.clear();
                 upperValueBounds.clear();
                 values.clear();
-                mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true);
-                currentRowCount = 0;
-                startOfCurrentRowGroup = 0;
+                exploredMdpTransitions.clear();
+                exploredChoiceIndices.clear();
                 mdpActionRewards.clear();
                 exploredMdp = nullptr;
+                currentMdpState = noState();
                 
                 // Add some states with special treatment (if requested)
                 if (extraBottomStateValue) {
@@ -63,10 +66,8 @@ namespace storm {
                     mdpStateToBeliefIdMap.push_back(beliefManager->noId());
                     insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
 
-                    startOfCurrentRowGroup = currentRowCount;
-                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
-                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>());
-                    ++currentRowCount;
+                    internalAddRowGroupIndex();
+                    internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one<ValueType>());
                 } else {
                     extraBottomState = boost::none;
                 }
@@ -75,10 +76,8 @@ namespace storm {
                     mdpStateToBeliefIdMap.push_back(beliefManager->noId());
                     insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
                     
-                    startOfCurrentRowGroup = currentRowCount;
-                    mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
-                    mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>());
-                    ++currentRowCount;
+                    internalAddRowGroupIndex();
+                    internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one<ValueType>());
                     
                     targetStates.grow(getCurrentNumberOfMdpStates(), false);
                     targetStates.set(extraTargetState.get(), true);
@@ -89,24 +88,62 @@ namespace storm {
                 // Set up the initial state.
                 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
             }
+            
+            /*!
+             * Restarts the exploration to allow re-exploring each state.
+             * After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty
+             * This method inserts the initial state of the MDP in the exploration queue.
+             * While re-exploring, the reference to the old MDP remains valid.
+             */
+            void restartExploration() {
+                STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status.");
+                 // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid.
+                exploredBeliefIds.clear();
+                exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
+                exploredMdpTransitions.clear();
+                exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices);
+                exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
+                mdpActionRewards.clear();
+                if (exploredMdp->hasRewardModel()) {
+                    // Can be overwritten during exploration
+                    mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector();
+                }
+                targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false);
+                truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false);
+                mdpStatesToExplore.clear();
+
+                // The extra states are not changed
+                if (extraBottomState) {
+                    currentMdpState = extraBottomState.get();
+                    restoreOldBehaviorAtCurrentState(0);
+                }
+                if (extraTargetState) {
+                    currentMdpState = extraTargetState.get();
+                    restoreOldBehaviorAtCurrentState(0);
+                }
+                currentMdpState = noState();
+                
+                // Set up the initial state.
+                initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
+            }
     
             bool hasUnexploredState() const {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                return !beliefIdsToExplore.empty();
+                return !mdpStatesToExplore.empty();
             }
     
             BeliefId exploreNextState() {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                // Set up the matrix builder
-                finishCurrentRow();
-                startOfCurrentRowGroup = currentRowCount;
-                mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
-                ++currentRowCount;
                 
                 // Pop from the queue.
-                auto result = beliefIdsToExplore.front();
-                beliefIdsToExplore.pop_front();
-                return result;
+                currentMdpState = mdpStatesToExplore.front();
+                mdpStatesToExplore.pop_front();
+                
+                if (!currentStateHasOldBehavior()) {
+                    internalAddRowGroupIndex();
+                }
+                
+                return mdpStateToBeliefIdMap[currentMdpState];
             }
             
             void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
@@ -114,7 +151,7 @@ namespace storm {
                 // We first insert the entries of the current row in a separate map.
                 // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                 
-                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
                 if (!storm::utility::isZero(bottomStateValue)) {
                     STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
                     internalAddTransition(row, extraBottomState.get(), bottomStateValue);
@@ -127,7 +164,7 @@ namespace storm {
             
             void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
                 internalAddTransition(row, getCurrentMdpState(), value);
             }
             
@@ -145,24 +182,24 @@ namespace storm {
                 // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
                 MdpStateType column;
                 if (ignoreNewBeliefs) {
-                    column = getMdpState(transitionTarget);
+                    column = getExploredMdpState(transitionTarget);
                     if (column == noState()) {
                         return false;
                     }
                 } else {
                     column = getOrAddMdpState(transitionTarget);
                 }
-                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
                 internalAddTransition(row, column, value);
                 return true;
             }
             
             void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                if (currentRowCount >= mdpActionRewards.size()) {
-                    mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
+                if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
+                    mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
                 }
-                uint64_t row = startOfCurrentRowGroup + localActionIndex;
+                uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
                 mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
             }
             
@@ -178,11 +215,64 @@ namespace storm {
                 truncatedStates.set(getCurrentMdpState(), true);
             }
             
+            bool currentStateHasOldBehavior() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
+            }
+            
+            /*!
+             * Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
+             * Does NOT set whether the state is truncated and/or target.
+             * Will add "old" states that have not been considered before into the exploration queue
+             * @param localActionIndex
+             */
+            void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) {
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any.");
+                uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex;
+                STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index.");
+                
+                // Insert the transitions
+                for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) {
+                    internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue());
+                    // Check whether exploration is needed
+                    auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()];
+                    if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state
+                        if (!exploredBeliefIds.get(beliefId)) {
+                            // This belief needs exploration
+                            exploredBeliefIds.set(beliefId, true);
+                            mdpStatesToExplore.push_back(transition.getColumn());
+                        }
+                    }
+                }
+                
+                // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values
+            }
+            
             void finishExploration() {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states.");
+                // Finish the last row grouping in case the last explored state was new
+                if (!currentStateHasOldBehavior()) {
+                    internalAddRowGroupIndex();
+                }
+                
                 // Create the tranistion matrix
-                finishCurrentRow();
-                auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
+                uint64_t entryCount = 0;
+                for (auto const& row : exploredMdpTransitions) {
+                    entryCount += row.size();
+                }
+                storm::storage::SparseMatrixBuilder<ValueType> builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates());
+                for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
+                    uint64_t rowIndex = exploredChoiceIndices[groupIndex];
+                    uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
+                    builder.newRowGroup(rowIndex);
+                    for (; rowIndex < groupEnd; ++rowIndex) {
+                        for (auto const& entry : exploredMdpTransitions[rowIndex]) {
+                            builder.addNextValue(rowIndex, entry.first, entry.second);
+                        }
+                    }
+                }
+                auto mdpTransitionMatrix = builder.build();
                 
                 // Create a standard labeling
                 storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
@@ -212,13 +302,18 @@ namespace storm {
             }
             
             MdpStateType getCurrentNumberOfMdpStates() const {
-                STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 return mdpStateToBeliefIdMap.size();
             }
     
             MdpStateType getCurrentNumberOfMdpChoices() const {
-                STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
-                return currentRowCount;
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                return exploredMdpTransitions.size();
+            }
+    
+            MdpStateType getStartOfCurrentRowGroup() const {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                return exploredChoiceIndices.back();
             }
 
             ValueType getLowerValueBoundAtCurrentState() const {
@@ -291,7 +386,8 @@ namespace storm {
             }
             
             MdpStateType getCurrentMdpState() const {
-                return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1;
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                return currentMdpState;
             }
             
             MdpStateType getCurrentBeliefId() const {
@@ -299,27 +395,20 @@ namespace storm {
             }
             
             void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
-                // We first insert the entries of the current row in a separate map.
-                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
-                STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row.");
-                if (row >= currentRowCount) {
-                    // We are going to start a new row, so insert the entries of the old one
-                    finishCurrentRow();
-                    currentRowCount = row + 1;
+                STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row.");
+                if (row == exploredMdpTransitions.size()) {
+                    exploredMdpTransitions.emplace_back();
                 }
-                STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state.");
-                mdpTransitionsBuilderCurrentRowEntries[column] = value;
+                STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state.");
+                exploredMdpTransitions[row][column] = value;
             }
             
-            void finishCurrentRow() {
-                for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) {
-                    mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second);
-                }
-                mdpTransitionsBuilderCurrentRowEntries.clear();
+            void internalAddRowGroupIndex() {
+                exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices());
             }
             
-            MdpStateType getMdpState(BeliefId const& beliefId) const {
-                if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) {
+            MdpStateType getExploredMdpState(BeliefId const& beliefId) const {
+                if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) {
                     return beliefIdToMdpStateMap.at(beliefId);
                 } else {
                     return noState();
@@ -336,20 +425,28 @@ namespace storm {
             }
             
             MdpStateType getOrAddMdpState(BeliefId const& beliefId) {
-                beliefIdsWithMdpState.grow(beliefId + 1, false);
-                if (beliefIdsWithMdpState.get(beliefId)) {
+                exploredBeliefIds.grow(beliefId + 1, false);
+                if (exploredBeliefIds.get(beliefId)) {
                     return beliefIdToMdpStateMap[beliefId];
                 } else {
-                    // Add a new MDP state
-                    beliefIdsWithMdpState.set(beliefId, true);
+                    // This state needs exploration
+                    exploredBeliefIds.set(beliefId, true);
+                    
+                    // If this is a restart of the exploration, we still might have an MDP state for the belief
+                    if (exploredMdp) {
+                        auto findRes = beliefIdToMdpStateMap.find(beliefId);
+                        if (findRes != beliefIdToMdpStateMap.end()) {
+                            mdpStatesToExplore.push_back(findRes->second);
+                            return findRes->second;
+                        }
+                    }
+                    // At this poind we need to add a new MDP state
                     MdpStateType result = getCurrentNumberOfMdpStates();
                     assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
                     mdpStateToBeliefIdMap.push_back(beliefId);
                     beliefIdToMdpStateMap[beliefId] = result;
-                    // This new belief needs exploration
-                    beliefIdsToExplore.push_back(beliefId);
-                    
                     insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
+                    mdpStatesToExplore.push_back(result);
                     return result;
                 }
             }
@@ -358,15 +455,14 @@ namespace storm {
             std::shared_ptr<BeliefManagerType> beliefManager;
             std::vector<BeliefId> mdpStateToBeliefIdMap;
             std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
-            storm::storage::BitVector beliefIdsWithMdpState;
+            storm::storage::BitVector exploredBeliefIds;
             
             // Exploration information
-            std::deque<uint64_t> beliefIdsToExplore;
-            storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder;
-            std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries;
+            std::deque<uint64_t> mdpStatesToExplore;
+            std::vector<std::map<MdpStateType, ValueType>> exploredMdpTransitions;
+            std::vector<MdpStateType> exploredChoiceIndices;
             std::vector<ValueType> mdpActionRewards;
-            uint64_t startOfCurrentRowGroup;
-            uint64_t currentRowCount;
+            uint64_t currentMdpState;
             
             // Special states during exploration
             boost::optional<MdpStateType> extraTargetState;
diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index 9cb7c039c..8f0dcd225 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -324,8 +324,8 @@ namespace storm {
             }
             
             std::map<BeliefId, ValueType> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
-                std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
-                // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds
+                std::map<BeliefId, ValueType> destinations;
+                // TODO: Output as vector?
                 
                 BeliefType belief = getBelief(beliefId);
                 

From 3041b881d44eb4e53f8c4a3dda7817456eadef81 Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Wed, 1 Apr 2020 22:34:47 +0200
Subject: [PATCH 20/40] Beginning of dropUnreachableStates()

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 41 +++++++++++++++++++--
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index e13e20cf3..bb53c61c6 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -287,20 +287,53 @@ namespace storm {
                 std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
                 if (!mdpActionRewards.empty()) {
                     mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
-                    mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
+                    mdpRewardModels.emplace("default",
+                                            storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
                 }
-                
+
                 storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
                 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
                 status = Status::ModelFinished;
             }
-            
+
+            void dropUnreachableStates() {
+                STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status.");
+                storm::storage::BitVector reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(),
+                                                                                                      storm::storage::BitVector(getCurrentNumberOfMdpStates(), {initialMdpState}),
+                                                                                                      storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), targetStates);
+                auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates);
+                auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates);
+                // TODO reward model
+                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling));
+                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
+
+                std::vector<BeliefId> reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits());
+                std::vector<ValueType> reachableLowerValueBounds(reachableStates.getNumberOfSetBits());
+                std::vector<ValueType> reachableUpperValueBounds(reachableStates.getNumberOfSetBits());
+                std::vector<ValueType> reachableValues(reachableStates.getNumberOfSetBits());
+                for (uint64_t state = 0; state < reachableStates.size(); ++state) {
+                    if (reachableStates[state]) {
+                        reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]);
+                        reachableLowerValueBounds.push_back(lowerValueBounds[state]);
+                        reachableUpperValueBounds.push_back(upperValueBounds[state]);
+                        reachableValues.push_back(values[state]);
+                    }
+                    //TODO drop BeliefIds from exploredBeliefIDs?
+                }
+                std::map<BeliefId, MdpStateType> reachableBeliefIdToMdpStateMap;
+                for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) {
+                    reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state;
+                }
+                mdpStateToBeliefIdMap = reachableMdpStateToBeliefIdMap;
+                beliefIdToMdpStateMap = reachableBeliefIdToMdpStateMap;
+            }
+
             std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
                 STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status.");
                 STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
                 return exploredMdp;
             }
-            
+
             MdpStateType getCurrentNumberOfMdpStates() const {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 return mdpStateToBeliefIdMap.size();

From 62c905fc583ef215d3dcd885181f0d99f816a5bc Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Thu, 2 Apr 2020 20:05:00 +0200
Subject: [PATCH 21/40] Added basis for rewards in dropUnreachableStates()

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 28 +++++++++++++++------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index bb53c61c6..426eff188 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -298,28 +298,42 @@ namespace storm {
 
             void dropUnreachableStates() {
                 STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status.");
-                storm::storage::BitVector reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(),
-                                                                                                      storm::storage::BitVector(getCurrentNumberOfMdpStates(), {initialMdpState}),
-                                                                                                      storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), targetStates);
+                auto reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(),
+                                                                                 storm::storage::BitVector(getCurrentNumberOfMdpStates(), std::vector<uint64_t>{initialMdpState}),
+                                                                                 storm::storage::BitVector(getCurrentNumberOfMdpStates(), true),
+                                                                                 getExploredMdp()->getStateLabeling().getStates("target"));
                 auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates);
                 auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates);
-                // TODO reward model
-                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling));
-                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
-
                 std::vector<BeliefId> reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits());
                 std::vector<ValueType> reachableLowerValueBounds(reachableStates.getNumberOfSetBits());
                 std::vector<ValueType> reachableUpperValueBounds(reachableStates.getNumberOfSetBits());
                 std::vector<ValueType> reachableValues(reachableStates.getNumberOfSetBits());
+                std::vector<ValueType> reachableMdpActionRewards;
                 for (uint64_t state = 0; state < reachableStates.size(); ++state) {
                     if (reachableStates[state]) {
                         reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]);
                         reachableLowerValueBounds.push_back(lowerValueBounds[state]);
                         reachableUpperValueBounds.push_back(upperValueBounds[state]);
                         reachableValues.push_back(values[state]);
+                        if (getExploredMdp()->hasRewardModel()) {
+                            //TODO FIXME is there some mismatch with the indices here?
+                            for (uint64_t i = 0; i < getExploredMdp()->getTransitionMatrix().getRowGroupSize(state); ++i) {
+                                reachableMdpActionRewards.push_back(getExploredMdp()->getUniqueRewardModel().getStateActionRewardVector()[state + i]);
+                            }
+                        }
                     }
                     //TODO drop BeliefIds from exploredBeliefIDs?
                 }
+                std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
+                if (!reachableMdpActionRewards.empty()) {
+                    //reachableMdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
+                    mdpRewardModels.emplace("default",
+                                            storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(reachableMdpActionRewards)));
+                }
+                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling),
+                                                                                   std::move(mdpRewardModels));
+                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
+
                 std::map<BeliefId, MdpStateType> reachableBeliefIdToMdpStateMap;
                 for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) {
                     reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state;

From c2ddea14806cfabc81e1b956ced220129e6f463b Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Fri, 3 Apr 2020 12:41:55 +0200
Subject: [PATCH 22/40] First (re-) implementation of refinement. (probably
 needs some testing/debugging)

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h   |  60 +-
 .../ApproximatePOMDPModelchecker.cpp          | 712 +++++-------------
 .../ApproximatePOMDPModelchecker.h            | 141 +---
 3 files changed, 267 insertions(+), 646 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index 426eff188..2a97c5e05 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -43,6 +43,10 @@ namespace storm {
             }
             BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
 
+            BeliefManagerType const& getBeliefManager() const {
+                return *beliefManager;
+            }
+            
             void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
                 status = Status::Exploring;
                 // Reset data from potential previous explorations
@@ -101,7 +105,7 @@ namespace storm {
                 exploredBeliefIds.clear();
                 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
                 exploredMdpTransitions.clear();
-                exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices);
+                exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices());
                 exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
                 mdpActionRewards.clear();
                 if (exploredMdp->hasRewardModel()) {
@@ -235,7 +239,7 @@ namespace storm {
                 for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) {
                     internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue());
                     // Check whether exploration is needed
-                    auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()];
+                    auto beliefId = getBeliefId(transition.getColumn());
                     if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state
                         if (!exploredBeliefIds.get(beliefId)) {
                             // This belief needs exploration
@@ -397,6 +401,10 @@ namespace storm {
                 status = Status::ModelChecked;
             }
             
+            bool hasComputedValues() const {
+                return status == Status::ModelChecked;
+            }
+            
             std::vector<ValueType> const& getValuesOfExploredMdp() const {
                 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
                 return values;
@@ -408,6 +416,51 @@ namespace storm {
                 return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)];
             }
             
+            MdpStateType getBeliefId(MdpStateType exploredMdpState) const {
+                STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
+                return mdpStateToBeliefIdMap[exploredMdpState];
+            }
+            
+            struct SuccessorObservationInformation {
+                SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) {
+                    // Intentionally left empty.
+                }
+                
+                void join(SuccessorObservationInformation other) {
+                    observationProbability += other.observationProbability;
+                    maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
+                    successorWithObsCount += other.successorWithObsCount;
+                }
+                
+                ValueType observationProbability; /// The probability we move to the corresponding observation.
+                ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation.
+                uint64_t successorWithObsCount; /// The number of successors with this observation
+            };
+            
+            void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
+                uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
+                gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
+            }
+            
+            void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+                STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
+                for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
+                    auto const& beliefId = getBeliefId(entry.getColumn());
+                    if (beliefId != beliefManager->noId()) {
+                        auto const& obs = beliefManager->getBeliefObservation(beliefId);
+                        SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1);
+                        auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info);
+                        if (!obsInsertion.second) {
+                            // There already is an entry for this observation, so join the two informations
+                            obsInsertion.first->second.join(info);
+                        }
+                    }
+                }
+            }
+            
+            
         private:
             MdpStateType noState() const {
                 return std::numeric_limits<MdpStateType>::max();
@@ -438,7 +491,8 @@ namespace storm {
             }
             
             MdpStateType getCurrentBeliefId() const {
-                return mdpStateToBeliefIdMap[getCurrentMdpState()];
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                return getBeliefId(getCurrentMdpState());
             }
             
             void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 436fc3e09..4526607d4 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -171,8 +171,9 @@ namespace storm {
                     if (rewardModelName) {
                         manager->setRewardModel(rewardModelName);
                     }
-                    auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager);
-                    if (approx) {
+                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx);
+                    if (approx->hasComputedValues()) {
                         STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n");
                         approx->getExploredMdp()->printModelInformationToStream(std::cout);
                         ValueType& resultValue = min ? result.lowerBound : result.upperBound;
@@ -185,8 +186,9 @@ namespace storm {
                     if (rewardModelName) {
                         manager->setRewardModel(rewardModelName);
                     }
-                    auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager);
-                    if (approx) {
+                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx);
+                    if (approx->hasComputedValues()) {
                         STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
                         approx->getExploredMdp()->printModelInformationToStream(std::cout);
                         ValueType& resultValue = min ? result.upperBound : result.lowerBound;
@@ -200,23 +202,27 @@ namespace storm {
                 
                 // Set up exploration data
                 std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                auto beliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                auto overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                auto underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                 if (rewardModelName) {
-                    beliefManager->setRewardModel(rewardModelName);
+                    overApproxBeliefManager->setRewardModel(rewardModelName);
+                    underApproxBeliefManager->setRewardModel(rewardModelName);
                 }
                 
                 // OverApproximaion
-                auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager);
-                if (!overApproximation) {
+                auto overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation);
+                if (!overApproximation->hasComputedValues()) {
                     return;
                 }
                 ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
                 overApproxValue = overApproximation->getComputedValueAtInitialState();
                 
-                // UnderApproximation TODO: use same belief manager?)
-                uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
-                auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager);
-                if (!underApproximation) {
+                // UnderApproximation
+                uint64_t underApproxSizeThreshold = std::max<uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates(), 10);
+                auto underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
+                if (!underApproximation->hasComputedValues()) {
                     return;
                 }
                 ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
@@ -225,379 +231,165 @@ namespace storm {
                 // ValueType lastMinScore = storm::utility::infinity<ValueType>();
                 // Start refinement
                 statistics.refinementSteps = 0;
+                ValueType refinementAggressiveness = storm::utility::zero<ValueType>();
                 while (result.diff() > options.refinementPrecision) {
                     if (storm::utility::resources::isTerminate()) {
                         break;
                     }
-                    // TODO the actual refinement
-                    /*
-                    // choose which observation(s) to refine
-                    std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
-                    std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
-                    bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin();
-                    while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) {
-                        auto currentBelief = res->beliefList[underApproxStateBeliefIter->second];
-                        beliefCount[currentBelief.observation] += 1;
-                        bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second);
-                        if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) {
-                            // If there is an over-approximate value for the belief, use it
-                            auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first];
-                            obsAccumulator[currentBelief.observation] += diff;
-                        } else {
-                            //otherwise, we approximate a value TODO this is critical, we have to think about it
-                            auto overApproxValue = storm::utility::zero<ValueType>();
-                            auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates());
-                            auto subSimplex = temp.first;
-                            auto lambdas = temp.second;
-                            for (size_t j = 0; j < lambdas.size(); ++j) {
-                                if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                    uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]);
-                                    bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId);
-                                    if (approxIter != res->overApproxBeliefStateMap.left.end()) {
-                                        overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second];
-                                    } else {
-                                        overApproxValue += lambdas[j];
-                                    }
-                                }
-                            }
-                            obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first];
-                        }
-                        ++underApproxStateBeliefIter;
-                    }
-
-
-                    //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
-                       // obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
-                    //}
-                    changedObservations.clear();
-
-                    //TODO think about some other scoring methods
-                    auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end());
-                    //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) {
-                    lastMinScore = maxAvgDifference;
-                    auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
-                    STORM_PRINT("Set all to " << maxRes + 1 << std::endl)
-                    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
-                        observationResolutionVector[i] = maxRes + 1;
-                        changedObservations.insert(i);
+                    
+                    // Refine over-approximation
+                    refinementAggressiveness *= storm::utility::convertNumber<ValueType>(1.1);;
+                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation);
+                    if (overApproximation->hasComputedValues()) {
+                        overApproxValue = overApproximation->getComputedValueAtInitialState();
+                    } else {
+                        break;
                     }
-                    //} else {
-                   //     lastMinScore = std::min(maxAvgDifference, lastMinScore);
-                   //     STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
-                   //     STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
-                   //     //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
-                    //    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
-                            //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i])
-                   //         if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
-                                //STORM_PRINT(" *** ")
-                  //              observationResolutionVector[i] += 1;
-                   //             changedObservations.insert(i);
-                    //        }
-                            //STORM_PRINT(std::endl)
-                    //    }
-                    //}
-                    if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) {
-                        underApproxModelSize += 100;
+                    
+                    // Refine under-approximation
+                    underApproxSizeThreshold *= storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType>(underApproxSizeThreshold) * (storm::utility::one<ValueType>() + refinementAggressiveness));
+                    underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
+                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
+                    if (underApproximation->hasComputedValues()) {
+                        underApproxValue = underApproximation->getComputedValueAtInitialState();
+                    } else {
+                        break;
                     }
-                    STORM_PRINT(
-                            "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl)
-                    res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
-                                                res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize);
-                    //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot");
-                    STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
-                                         cc.isEqual(res->underApproxValue, res->overApproxValue),
-                                         "The value for the under-approximation is larger than the value for the over-approximation.");
-                    */
                     ++statistics.refinementSteps.get();
                 }
             }
 
+            /*!
+             * Heuristically rates the quality of the approximation described by the given successor observation info.
+             * Here, 0 means a bad approximation and 1 means a good approximation.
+             */
+            template<typename PomdpModelType, typename BeliefValueType>
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) {
+                auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
+                auto one = storm::utility::one<ValueType>();
+                
+                // Create the actual rating for this observation at this choice from the given info
+                ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
+                // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
+                // Normalize the rating so that it ranges from 0 to 1, where
+                // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
+                obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
+                return obsChoiceRating;
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation) {
+                uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices();
+
+                std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>());
+                
+                std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
+                for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) {
+                    gatheredSuccessorObservations.clear();
+                    overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
+                    for (auto const& obsInfo : gatheredSuccessorObservations) {
+                        auto const& obs = obsInfo.first;
+                        ValueType obsChoiceRating = rateObservation(obsInfo.second);
+ 
+                        // The rating of the observation will be the minimum over all choice-based observation ratings
+                        resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
+                    }
+                }
+                return resultingRatings;
+            }
+            
             template<typename PomdpModelType, typename BeliefValueType>
-            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager) {
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+                STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given");
+                STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero<ValueType>(), "Can not refine with negative  aggressiveness.");
+                STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one<ValueType>(), "Refinement with aggressiveness > 1 is invalid.");
+                
                 statistics.overApproximationBuildTime.start();
-                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
-                if (computeRewards) {
-                    explorer.startNewExploration(storm::utility::zero<ValueType>());
+                storm::storage::BitVector refinedObservations;
+                if (!refine) {
+                    // If we build the model from scratch, we first have to setup the explorer for the overApproximation.
+                    if (computeRewards) {
+                        overApproximation->startNewExploration(storm::utility::zero<ValueType>());
+                    } else {
+                        overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    }
                 } else {
-                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement.
+                    auto obsRatings = getObservationRatings(overApproximation);
+                    ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
+                    // Potentially increase the aggressiveness so that at least one observation actually gets refinement.
+                    *refinementAggressiveness = std::max(minRating, *refinementAggressiveness);
+                    refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;});
+                    STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
+                    for (auto const& obs : refinedObservations) {
+                        // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness)
+                        ValueType incrementValue = storm::utility::one<ValueType>() + (*refinementAggressiveness) * storm::utility::convertNumber<ValueType>(observationResolutionVector[obs]);
+                        observationResolutionVector[obs] += storm::utility::convertNumber<uint64_t>(storm::utility::ceil(incrementValue));
+                    }
+                    overApproximation->restartExploration();
                 }
                 
-                // Expand the beliefs to generate the grid on-the-fly
-                while (explorer.hasUnexploredState()) {
-                    uint64_t currId = explorer.exploreNextState();
+                // Start exploration
+                std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
+                while (overApproximation->hasUnexploredState()) {
+                    uint64_t currId = overApproximation->exploreNextState();
                     
                     uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                     if (targetObservations.count(currObservation) != 0) {
-                        explorer.setCurrentStateIsTarget();
-                        explorer.addSelfloopTransition();
+                        overApproximation->setCurrentStateIsTarget();
+                        overApproximation->addSelfloopTransition();
                     } else {
                         bool stopExploration = false;
-                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                        if (storm::utility::abs<ValueType>(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                             stopExploration = true;
-                            explorer.setCurrentStateIsTruncated();
+                            overApproximation->setCurrentStateIsTruncated();
                         }
                         for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
-                            ValueType truncationProbability = storm::utility::zero<ValueType>();
-                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
-                            auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
-                            for (auto const& successor : successorGridPoints) {
-                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
-                                if (!added) {
-                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
-                                    // We did not explore this successor state. Get a bound on the "missing" value
-                                    truncationProbability += successor.second;
-                                    truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first));
-                                }
-                            }
-                            if (stopExploration) {
-                                if (computeRewards) {
-                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
-                                } else {
-                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
-                                }
-                            }
-                            if (computeRewards) {
-                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
-                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
-                            }
-                        }
-                    }
-                    if (storm::utility::resources::isTerminate()) {
-                        statistics.overApproximationBuildAborted = true;
-                        break;
-                    }
-                }
-                statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates();
-                if (storm::utility::resources::isTerminate()) {
-                    statistics.overApproximationBuildTime.stop();
-                    return nullptr;
-                }
-                
-                explorer.finishExploration();
-                statistics.overApproximationBuildTime.stop();
-
-                statistics.overApproximationCheckTime.start();
-                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
-                statistics.overApproximationCheckTime.stop();
-                
-                return std::make_shared<ExplorerType>(std::move(explorer));
-            }
-
-            template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
-            /*TODO:
-                       template<typename PomdpModelType, typename BeliefValueType>
-            std::shared_ptr<RefinementComponents<ValueType>>
-            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                            std::vector<uint64_t> &observationResolutionVector,
-                                                                                            bool computeRewards,
-                                                                                            std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
-                                                                                            std::set<uint32_t> changedObservations,
-                                                                                            boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                            boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
-                                                                                            uint64_t maxUaModelSize) {
-                bool initialBoundMapsSet = overApproximationMap && underApproximationMap;
-                std::map<uint64_t, ValueType> initialOverMap;
-                std::map<uint64_t, ValueType> initialUnderMap;
-                if (initialBoundMapsSet) {
-                    initialOverMap = overApproximationMap.value();
-                    initialUnderMap = underApproximationMap.value();
-                }
-                // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively
-                std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
-                std::map<uint64_t, std::vector<ValueType>> lambdaCache;
-
-                // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace
-                std::map<uint64_t, ValueType> weightedSumOverMap;
-                std::map<uint64_t, ValueType> weightedSumUnderMap;
-
-                statistics.overApproximationBuildTime.start();
-
-                uint64_t nextBeliefId = refinementComponents->beliefList.size();
-                uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
-                std::set<uint64_t> relevantStates; // The MDP states where the observation has changed
-                for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
-                    auto currentBelief = refinementComponents->beliefList[iter.first];
-                    if (changedObservations.find(currentBelief.observation) != changedObservations.end()) {
-                        relevantStates.insert(iter.second);
-                    }
-                }
-
-                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck; // The predecessors of states where the observation has changed
-                for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) {
-                    for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) {
-                        for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(
-                                state, row).begin();
-                             itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) {
-                            if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) {
-                                statesAndActionsToCheck.insert(std::make_pair(state, row));
-                                break;
-                            }
-                        }
-                    }
-                }
-
-                std::deque<uint64_t> beliefsToBeExpanded;
-
-                std::map<std::pair<uint64_t, uint64_t>, std::map<uint64_t, ValueType>> transitionsStateActionPair;
-                for (auto const &stateActionPair : statesAndActionsToCheck) {
-                    auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first);
-                    auto action = stateActionPair.second;
-                    std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId],
-                                                                                                                              action);
-                    std::map<uint64_t, ValueType> transitionInActionBelief;
-                    for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
-                        // Expand and triangulate the successor
-                        uint32_t observation = iter->first;
-                        uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
-                                                                                   targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId);
-                        nextBeliefId = refinementComponents->beliefList.size();
-                        //Triangulate here and put the possibly resulting belief in the grid
-                        std::vector<std::map<uint64_t, ValueType>> subSimplex;
-                        std::vector<ValueType> lambdas;
-                        //TODO add caching
-                        if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
-                            subSimplex = subSimplexCache[idNextBelief];
-                            lambdas = lambdaCache[idNextBelief];
-                        } else {
-                            auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities,
-                                                                    observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation],
-                                                                    pomdp.getNumberOfStates());
-                            subSimplex = temp.first;
-                            lambdas = temp.second;
-                            if (options.cacheSubsimplices) {
-                                subSimplexCache[idNextBelief] = subSimplex;
-                                lambdaCache[idNextBelief] = lambdas;
-                            }
-                        }
-                        for (size_t j = 0; j < lambdas.size(); ++j) {
-                            if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]);
-                                if (approxId == uint64_t(-1)) {
-                                    // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
-                                    storm::pomdp::Belief<ValueType> gridBelief = {nextBeliefId, observation, subSimplex[j]};
-                                    refinementComponents->beliefList.push_back(gridBelief);
-                                    refinementComponents->beliefGrid.push_back(gridBelief);
-                                    refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                                    // compute overapproximate value using MDP result map
-                                    if (initialBoundMapsSet) {
-                                        auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                        auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                        for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                            tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialOverMap[i]);
-                                            tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialUnderMap[i]);
-                                        }
-                                        weightedSumOverMap[nextBeliefId] = tempWeightedSumOver;
-                                        weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder;
+                            // Check whether we expand this state/action pair
+                            // We always expand if we are not doing refinement of if the state was not available in the "old" MDP.
+                            // Otherwise, a heuristic decides.
+                            bool expandStateAction = true;
+                            if (refine && overApproximation->currentStateHasOldBehavior()) {
+                                // Compute a rating of the current state/action pair
+                                ValueType stateActionRating = storm::utility::one<ValueType>();
+                                gatheredSuccessorObservations.clear();
+                                overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations);
+                                for (auto const& obsInfo : gatheredSuccessorObservations) {
+                                    if (refinedObservations.get(obsInfo.first)) {
+                                        ValueType obsRating = rateObservation(obsInfo.second);
+                                        stateActionRating = std::min(stateActionRating, obsRating);
                                     }
-                                    beliefsToBeExpanded.push_back(nextBeliefId);
-                                    refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId));
-                                    transitionInActionBelief[nextStateId] = iter->second * lambdas[j];
-                                    ++nextBeliefId;
-                                    ++nextStateId;
-                                } else {
-                                    transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
                                 }
+                                // Only refine if this rating is below the doubled refinementAggressiveness
+                                expandStateAction = stateActionRating < storm::utility::convertNumber<ValueType>(2.0) * (*refinementAggressiveness);
                             }
-                        }
-                    }
-                    if (!transitionInActionBelief.empty()) {
-                        transitionsStateActionPair[stateActionPair] = transitionInActionBelief;
-                    }
-                }
-
-                std::set<uint64_t> stoppedExplorationStateSet;
-
-                // Expand newly added beliefs
-                while (!beliefsToBeExpanded.empty()) {
-                    uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
-                    bool isTarget = refinementComponents->beliefIsTarget[currId];
-
-                    if (initialBoundMapsSet &&
-                        cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber<ValueType>(options.explorationThreshold))) {
-                        STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId]
-                                                                 << std::endl)
-                        transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]},
-                                                                                                                                         {0, storm::utility::one<ValueType>() -
-                                                                                                                                             weightedSumOverMap[currId]}};
-                        stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId));
-                        continue;
-                    }
-
-                    if (isTarget) {
-                        // Depending on whether we compute rewards, we select the right initial result
-                        // MDP stuff
-                        transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] =
-                                {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one<ValueType>()}};
-                    } else {
-                        uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front();
-                        uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
-                        std::vector<ValueType> actionRewardsInState(numChoices);
-
-                        for (uint64_t action = 0; action < numChoices; ++action) {
-                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action);
-                            std::map<uint64_t, ValueType> transitionInActionBelief;
-                            for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
-                                uint32_t observation = iter->first;
-                                // THIS CALL IS SLOW
-                                // TODO speed this up
-                                uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
-                                                                                           targetObservations, refinementComponents->beliefList[currId], action, observation,
-                                                                                           nextBeliefId);
-                                nextBeliefId = refinementComponents->beliefList.size();
-                                //Triangulate here and put the possibly resulting belief in the grid
-                                std::vector<std::map<uint64_t, ValueType>> subSimplex;
-                                std::vector<ValueType> lambdas;
-
-                                if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
-                                    subSimplex = subSimplexCache[idNextBelief];
-                                    lambdas = lambdaCache[idNextBelief];
-                                } else {
-                                    auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities,
-                                                                            observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation],
-                                                                            pomdp.getNumberOfStates());
-                                    subSimplex = temp.first;
-                                    lambdas = temp.second;
-                                    if (options.cacheSubsimplices) {
-                                        subSimplexCache[idNextBelief] = subSimplex;
-                                        lambdaCache[idNextBelief] = lambdas;
+                            if (expandStateAction) {
+                                ValueType truncationProbability = storm::utility::zero<ValueType>();
+                                ValueType truncationValueBound = storm::utility::zero<ValueType>();
+                                auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
+                                for (auto const& successor : successorGridPoints) {
+                                    bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                    if (!added) {
+                                        STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                        // We did not explore this successor state. Get a bound on the "missing" value
+                                        truncationProbability += successor.second;
+                                        truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first));
                                     }
                                 }
-
-                                for (size_t j = 0; j < lambdas.size(); ++j) {
-                                    if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                        auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]);
-                                        if (approxId == uint64_t(-1)) {
-                                            // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
-                                            storm::pomdp::Belief<ValueType> gridBelief = {nextBeliefId, observation, subSimplex[j]};
-                                            refinementComponents->beliefList.push_back(gridBelief);
-                                            refinementComponents->beliefGrid.push_back(gridBelief);
-                                            refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                                            // compute overapproximate value using MDP result map
-                                            if (initialBoundMapsSet) {
-                                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                                for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialOverMap[i]);
-                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialUnderMap[i]);
-                                                }
-                                                weightedSumOverMap[nextBeliefId] = tempWeightedSumOver;
-                                                weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder;
-                                            }
-                                            beliefsToBeExpanded.push_back(nextBeliefId);
-                                            refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId));
-                                            transitionInActionBelief[nextStateId] = iter->second * lambdas[j];
-                                            ++nextBeliefId;
-                                            ++nextStateId;
-                                        } else {
-                                            transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
-                                        }
+                                if (stopExploration) {
+                                    if (computeRewards) {
+                                        overApproximation->addTransitionsToExtraStates(action, truncationProbability);
+                                    } else {
+                                        overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
                                     }
                                 }
-                            }
-                            if (!transitionInActionBelief.empty()) {
-                                transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief;
+                                if (computeRewards) {
+                                    // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                    overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
+                                }
+                            } else {
+                                // Do not refine here
+                                overApproximation->restoreOldBehaviorAtCurrentState(action);
                             }
                         }
                     }
@@ -606,173 +398,85 @@ namespace storm {
                         break;
                     }
                 }
-
-                statistics.overApproximationStates = nextStateId;
+                // TODO: Drop unreachable states (sometimes?)
+                statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
                 if (storm::utility::resources::isTerminate()) {
                     statistics.overApproximationBuildTime.stop();
-                    // Return the result from the old refinement step
-                    return refinementComponents;
-                }
-                storm::models::sparse::StateLabeling mdpLabeling(nextStateId);
-                mdpLabeling.addLabel("init");
-                mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId));
-                mdpLabeling.addLabelToState("target", 1);
-                uint_fast64_t currentRow = 0;
-                uint_fast64_t currentRowGroup = 0;
-                storm::storage::SparseMatrixBuilder<ValueType> smb(0, nextStateId, 0, false, true);
-                auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix();
-                smb.newRowGroup(currentRow);
-                smb.addNextValue(currentRow, 0, storm::utility::one<ValueType>());
-                ++currentRow;
-                ++currentRowGroup;
-                smb.newRowGroup(currentRow);
-                smb.addNextValue(currentRow, 1, storm::utility::one<ValueType>());
-                ++currentRow;
-                ++currentRowGroup;
-                for (uint64_t state = 2; state < nextStateId; ++state) {
-                    smb.newRowGroup(currentRow);
-                    //STORM_PRINT("Loop State: " << state << std::endl)
-                    uint64_t numChoices = pomdp.getNumberOfChoices(
-                            pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front());
-                    bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)];
-                    for (uint64_t action = 0; action < numChoices; ++action) {
-                        if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) {
-                            for (auto const &entry : oldTransitionMatrix.getRow(state, action)) {
-                                smb.addNextValue(currentRow, entry.getColumn(), entry.getValue());
-                            }
-                        } else {
-                            for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) {
-                                smb.addNextValue(currentRow, iter.first, iter.second);
-                            }
-                        }
-                        ++currentRow;
-                        if (isTarget) {
-                            // If the state is a target, we only have one action, thus we add the target label and stop the iteration
-                            mdpLabeling.addLabelToState("target", state);
-                            break;
-                        }
-                        if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) {
-                            break;
-                        }
-                    }
-                    ++currentRowGroup;
-                }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(smb.build(), mdpLabeling);
-                storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
-                    for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
-                        auto currentBelief = refinementComponents->beliefList[iter.first];
-                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                        for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
-                            // Add the reward
-                            mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                                getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                     currentBelief));
-                        }
-                    }
-                    overApproxMdp.addRewardModel("std", mdpRewardModel);
-                    overApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
+                    return;
                 }
-                overApproxMdp.printModelInformationToStream(std::cout);
-                statistics.overApproximationBuildTime.stop();
-                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                 
-                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);
-                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model);
-                std::string propertyString = computeRewards ? "R" : "P";
-                propertyString += min ? "min" : "max";
-                propertyString += "=? [F \"target\"]";
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
-                auto task = storm::api::createTask<ValueType>(property, false);
+                overApproximation->finishExploration();
+                statistics.overApproximationBuildTime.stop();
+
                 statistics.overApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
+                overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.overApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return refinementComponents; // Return the result from the previous iteration
-                }
-                STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl)
-                STORM_LOG_ASSERT(res, "Result not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
-                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)];
-
-                //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations,
-                                                                       refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize);
-                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
-                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
-                    return std::make_unique<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId});
-                }
-                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
-
-                return std::make_shared<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                        underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid,
-                                                        refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap,
-                                                        underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId});
-            }
-            */
             }
 
             template<typename PomdpModelType, typename BeliefValueType>
-            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager) {
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
                 
                 statistics.underApproximationBuildTime.start();
-                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
-                if (computeRewards) {
-                    explorer.startNewExploration(storm::utility::zero<ValueType>());
+                if (!underApproximation->hasComputedValues()) {
+                    // Build a new under approximation
+                    if (computeRewards) {
+                        underApproximation->startNewExploration(storm::utility::zero<ValueType>());
+                    } else {
+                        underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    }
                 } else {
-                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    // Restart the building process
+                    underApproximation->restartExploration();
                 }
                 
-                // Expand the beliefs to generate the grid on-the-fly
-                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
-                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
-                }
-                while (explorer.hasUnexploredState()) {
-                    uint64_t currId = explorer.exploreNextState();
+                // Expand the beliefs
+                while (underApproximation->hasUnexploredState()) {
+                    uint64_t currId = underApproximation->exploreNextState();
                     
                     uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                     if (targetObservations.count(currObservation) != 0) {
-                        explorer.setCurrentStateIsTarget();
-                        explorer.addSelfloopTransition();
+                        underApproximation->setCurrentStateIsTarget();
+                        underApproximation->addSelfloopTransition();
                     } else {
                         bool stopExploration = false;
-                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
-                            stopExploration = true;
-                            explorer.setCurrentStateIsTruncated();
-                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) {
-                            stopExploration = true;
-                            explorer.setCurrentStateIsTruncated();
+                        if (!underApproximation->currentStateHasOldBehavior()) {
+                            if (storm::utility::abs<ValueType>(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                                stopExploration = true;
+                                underApproximation->setCurrentStateIsTruncated();
+                            } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) {
+                                stopExploration = true;
+                                underApproximation->setCurrentStateIsTruncated();
+                            }
                         }
                         for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
-                            ValueType truncationProbability = storm::utility::zero<ValueType>();
-                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
-                            auto successors = beliefManager->expand(currId, action);
-                            for (auto const& successor : successors) {
-                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
-                                if (!added) {
-                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
-                                    // We did not explore this successor state. Get a bound on the "missing" value
-                                    truncationProbability += successor.second;
-                                    truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first));
+                            // Always restore old behavior if available
+                            if (underApproximation->currentStateHasOldBehavior()) {
+                                underApproximation->restoreOldBehaviorAtCurrentState(action);
+                            } else {
+                                ValueType truncationProbability = storm::utility::zero<ValueType>();
+                                ValueType truncationValueBound = storm::utility::zero<ValueType>();
+                                auto successors = beliefManager->expand(currId, action);
+                                for (auto const& successor : successors) {
+                                    bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                    if (!added) {
+                                        STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                        // We did not explore this successor state. Get a bound on the "missing" value
+                                        truncationProbability += successor.second;
+                                        truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first));
+                                    }
+                                }
+                                if (stopExploration) {
+                                    if (computeRewards) {
+                                        underApproximation->addTransitionsToExtraStates(action, truncationProbability);
+                                    } else {
+                                        underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                    }
                                 }
-                            }
-                            if (stopExploration) {
                                 if (computeRewards) {
-                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
-                                } else {
-                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                    // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                    underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
                                 }
                             }
-                            if (computeRewards) {
-                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
-                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
-                            }
                         }
                     }
                     if (storm::utility::resources::isTerminate()) {
@@ -780,25 +484,19 @@ namespace storm {
                         break;
                     }
                 }
-                statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates();
+                statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
                 if (storm::utility::resources::isTerminate()) {
                     statistics.underApproximationBuildTime.stop();
-                    return nullptr;
+                    return;
                 }
                 
-                explorer.finishExploration();
+                underApproximation->finishExploration();
                 statistics.underApproximationBuildTime.stop();
 
                 statistics.underApproximationCheckTime.start();
-                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
+                underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.underApproximationCheckTime.stop();
 
-                return std::make_shared<ExplorerType>(std::move(explorer));
-            }
-
-            template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
-                // TODO
             }
 
             template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>;
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 0d59ac31a..7fbd2ab5e 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -18,32 +18,6 @@ namespace storm {
         namespace modelchecker {
             typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
 
-            /**
-             *  Struct containing information which is supposed to be persistent over multiple refinement steps
-             *
-             */
-            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
-            struct RefinementComponents {
-                std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> overApproxModelPtr;
-                ValueType overApproxValue;
-                ValueType underApproxValue;
-                std::map<uint64_t, ValueType> overApproxMap;
-                std::map<uint64_t, ValueType> underApproxMap;
-                std::vector<storm::pomdp::Belief<ValueType>> beliefList;
-                std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
-                std::vector<bool> beliefIsTarget;
-                bsmap_type overApproxBeliefStateMap;
-                bsmap_type underApproxBeliefStateMap;
-                uint64_t initialBeliefId;
-            };
-
-            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
-            struct UnderApproxComponents {
-                ValueType underApproxValue;
-                std::map<uint64_t, ValueType> underApproxMap;
-                bsmap_type underApproxBeliefStateMap;
-            };
-
             template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
             class ApproximatePOMDPModelchecker {
             public:
@@ -103,121 +77,16 @@ namespace storm {
                 /**
                  * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                  */
-                std::shared_ptr<ExplorerType> computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager);
-
-                void refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
+                void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
 
                 /**
                  * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
                  */
-                std::shared_ptr<ExplorerType> computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager);
-
-                void refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
-
-
-#ifdef REMOVE_THIS
-                /**
-                 * Helper to compute an underapproximation of the reachability property.
-                 * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
-                 *
-                 * @param beliefList vector containing already generated beliefs
-                 * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target
-                 * @param targetObservations set of target observations
-                 * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state
-                 * @param min true if minimum value is to be computed
-                 * @param computeReward true if rewards are to be computed
-                 * @param maxModelSize number of states up until which the belief support should be unrolled
-                 * @return struct containing the components generated during the under approximation
-                 */
-                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                                                                                                             std::vector<bool> &beliefIsTarget,
-                                                                                                             std::set<uint32_t> const &targetObservations,
-                                                                                                             uint64_t initialBeliefId, bool min, bool computeReward,
-                                                                                                             uint64_t maxModelSize);
-                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
-                                                                                                             std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
-                                                                                                             uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds);
-
-                /**
-                 * Constructs the initial belief for the given POMDP
-                 *
-                 * @param pomdp the POMDP
-                 * @param id the id the initial belief is given
-                 * @return a belief representing the initial belief
-                 */
-                storm::pomdp::Belief<ValueType>
-                getInitialBelief(uint64_t id);
-
-
-                /**
-                 * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation
-                 *
-                 * @param probabilities the probability distribution of the belief
-                 * @param gridResolution the resolution used for the belief
-                 * @param nrStates number of states in the POMDP
-                 * @return a pair containing: 1) the subsimplices 2) the lambda values
-                 */
-                std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
-                computeSubSimplexAndLambdas(std::map<uint64_t, ValueType> &probabilities, uint64_t gridResolution, uint64_t nrStates);
+                void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
 
-
-                /**
-                 * Helper method to get the probabilities to be in a state with each observation after performing an action
-                 *
-                 * @param belief the belief in which the action is performed
-                 * @param actionIndex the index of the action to be performed
-                 * @return mapping from each observation to the probability to be in a state with that observation after performing the action
-                 */
-                std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction(storm::pomdp::Belief<ValueType> &belief,
-                        uint64_t actionIndex);
-
-                /**
-                 * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation.
-                 * If the belief does not exist yet, it is created and added to the list of all beliefs
-                 *
-                 * @param beliefList data structure to store all generated beliefs
-                 * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief
-                 * @param targetObservations set of target observations
-                 * @param belief the starting belief
-                 * @param actionIndex the index of the action to be performed
-                 * @param observation the observation after the action was performed
-                 * @return the resulting belief (observation and distribution)
-                 */
-                uint64_t getBeliefAfterActionAndObservation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                        std::vector<bool> &beliefIsTarget,
-                        std::set<uint32_t> const &targetObservations,
-                        storm::pomdp::Belief<ValueType> &belief,
-                        uint64_t actionIndex, uint32_t observation, uint64_t id);
-
-                /**
-                 * Helper to get the id of a Belief stored in a given vector structure
-                 *
-                 * @param grid the vector on which the lookup is performed
-                 * @param observation the observation of the belief
-                 * @param probabilities the probability distribution over the POMDP states of the Belief
-                 * @return if the belief was found in the vector, the belief's ID, otherwise -1
-                 */
-                uint64_t getBeliefIdInVector(std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
-                                             std::map<uint64_t, ValueType> &probabilities);
-
-                /**
-                 * Helper method to build the transition matrix from a data structure containing transations
-                 *
-                 * @param transitions  data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability)
-                 * @return sparseMatrix representing the transitions
-                 */
-                storm::storage::SparseMatrix<ValueType> buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions);
-
-                /**
-                 * Get the reward for performing an action in a given belief
-                 *
-                 * @param action the index of the action to be performed
-                 * @param belief the belief in which the action is performed
-                 * @return the reward earned by performing the action in the belief
-                 */
-                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
-                ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
-#endif //REMOVE_THIS
+                ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info);
+                
+                std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation);
                 
                 struct Statistics {
                     Statistics();

From 6540b486e732a7921d08852ce9f7c3450e944729 Mon Sep 17 00:00:00 2001
From: Matthias Volk <matthias.volk@cs.rwth-aachen.de>
Date: Mon, 6 Apr 2020 14:31:47 +0200
Subject: [PATCH 23/40] NotSupportedException when using drn export for
 symbolic models

---
 src/storm-cli-utilities/model-handling.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/storm-cli-utilities/model-handling.h b/src/storm-cli-utilities/model-handling.h
index 6dad6fbb0..e6508c190 100644
--- a/src/storm-cli-utilities/model-handling.h
+++ b/src/storm-cli-utilities/model-handling.h
@@ -566,6 +566,10 @@ namespace storm {
             if (ioSettings.isExportExplicitSet()) {
                 storm::api::exportSparseModelAsDrn(model, ioSettings.getExportExplicitFilename(), input.model ? input.model.get().getParameterNames() : std::vector<std::string>());
             }
+
+            if (ioSettings.isExportDdSet()) {
+                STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Exporting in drdd format is only supported for DDs.");
+            }
             
             if (ioSettings.isExportDotSet()) {
                 storm::api::exportSparseModelAsDot(model, ioSettings.getExportDotFilename(), ioSettings.getExportDotMaxWidth());
@@ -576,6 +580,10 @@ namespace storm {
         void exportDdModel(std::shared_ptr<storm::models::symbolic::Model<DdType, ValueType>> const& model, SymbolicInput const& input) {
             auto ioSettings = storm::settings::getModule<storm::settings::modules::IOSettings>();
 
+            if (ioSettings.isExportExplicitSet()) {
+                STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Exporting in drn format is only supported for sparse models.");
+            }
+
             if (ioSettings.isExportDdSet()) {
                 storm::api::exportSparseModelAsDrdd(model, ioSettings.getExportDdFilename());
             }

From c3847d05afd0bce9aae4d80047a1a1543846f263 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 06:37:01 +0200
Subject: [PATCH 24/40] Scaling the rating of an observation with the current
 resolution.

---
 .../ApproximatePOMDPModelchecker.cpp           | 18 +++++++++++-------
 .../ApproximatePOMDPModelchecker.h             |  4 ++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 4526607d4..2936d9b40 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -264,21 +264,23 @@ namespace storm {
              * Here, 0 means a bad approximation and 1 means a good approximation.
              */
             template<typename PomdpModelType, typename BeliefValueType>
-            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) {
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
                 auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
                 auto one = storm::utility::one<ValueType>();
                 
-                // Create the actual rating for this observation at this choice from the given info
+                // Create the rating for this observation at this choice from the given info
                 ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
                 // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
                 // Normalize the rating so that it ranges from 0 to 1, where
                 // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
                 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
+                // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
+                obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
                 return obsChoiceRating;
             }
             
             template<typename PomdpModelType, typename BeliefValueType>
-            std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation) {
+            std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution) {
                 uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices();
 
                 std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>());
@@ -289,7 +291,7 @@ namespace storm {
                     overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
                     for (auto const& obsInfo : gatheredSuccessorObservations) {
                         auto const& obs = obsInfo.first;
-                        ValueType obsChoiceRating = rateObservation(obsInfo.second);
+                        ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
  
                         // The rating of the observation will be the minimum over all choice-based observation ratings
                         resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
@@ -303,7 +305,9 @@ namespace storm {
                 STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given");
                 STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero<ValueType>(), "Can not refine with negative  aggressiveness.");
                 STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one<ValueType>(), "Refinement with aggressiveness > 1 is invalid.");
-                
+                uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
+                STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << ".");
+
                 statistics.overApproximationBuildTime.start();
                 storm::storage::BitVector refinedObservations;
                 if (!refine) {
@@ -315,7 +319,7 @@ namespace storm {
                     }
                 } else {
                     // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement.
-                    auto obsRatings = getObservationRatings(overApproximation);
+                    auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution);
                     ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
                     // Potentially increase the aggressiveness so that at least one observation actually gets refinement.
                     *refinementAggressiveness = std::max(minRating, *refinementAggressiveness);
@@ -356,7 +360,7 @@ namespace storm {
                                 overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations);
                                 for (auto const& obsInfo : gatheredSuccessorObservations) {
                                     if (refinedObservations.get(obsInfo.first)) {
-                                        ValueType obsRating = rateObservation(obsInfo.second);
+                                        ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution);
                                         stateActionRating = std::min(stateActionRating, obsRating);
                                     }
                                 }
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 7fbd2ab5e..f895a3138 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -84,9 +84,9 @@ namespace storm {
                  */
                 void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
 
-                ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info);
+                ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);
                 
-                std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation);
+                std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution);
                 
                 struct Statistics {
                     Statistics();

From c2837bb749668df1cbadf995062d84520495c9e3 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 12:41:36 +0200
Subject: [PATCH 25/40] ApproximatePOMDPModelchecker: Improved output a bit.

---
 .../ApproximatePOMDPModelchecker.cpp          | 44 ++++++++++++-------
 .../ApproximatePOMDPModelchecker.h            |  2 +
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 2936d9b40..fb95b0838 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -133,6 +133,7 @@ namespace storm {
                         stream << ">=";
                     }
                     stream << statistics.overApproximationStates.get() << std::endl;
+                    stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl;
                     stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl;
                     stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl;
                 }
@@ -148,6 +149,7 @@ namespace storm {
                         stream << ">=";
                     }
                     stream << statistics.underApproximationStates.get() << std::endl;
+                    stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl;
                     stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl;
                     stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl;
                 }
@@ -231,14 +233,16 @@ namespace storm {
                 // ValueType lastMinScore = storm::utility::infinity<ValueType>();
                 // Start refinement
                 statistics.refinementSteps = 0;
-                ValueType refinementAggressiveness = storm::utility::zero<ValueType>();
+                ValueType refinementAggressiveness = storm::utility::convertNumber<ValueType>(0.0);
                 while (result.diff() > options.refinementPrecision) {
                     if (storm::utility::resources::isTerminate()) {
                         break;
                     }
+                    ++statistics.refinementSteps.get();
+                    STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << ".");
                     
                     // Refine over-approximation
-                    refinementAggressiveness *= storm::utility::convertNumber<ValueType>(1.1);;
+                    STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << ".");
                     buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation);
                     if (overApproximation->hasComputedValues()) {
                         overApproxValue = overApproximation->getComputedValueAtInitialState();
@@ -246,16 +250,18 @@ namespace storm {
                         break;
                     }
                     
-                    // Refine under-approximation
-                    underApproxSizeThreshold *= storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType>(underApproxSizeThreshold) * (storm::utility::one<ValueType>() + refinementAggressiveness));
-                    underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
-                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
-                    if (underApproximation->hasComputedValues()) {
-                        underApproxValue = underApproximation->getComputedValueAtInitialState();
-                    } else {
-                        break;
+                    if (result.diff() > options.refinementPrecision) {
+                        // Refine under-approximation
+                        underApproxSizeThreshold *= storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType>(underApproxSizeThreshold) * (storm::utility::one<ValueType>() + refinementAggressiveness));
+                        underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
+                        STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << ".");
+                        buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
+                        if (underApproximation->hasComputedValues()) {
+                            underApproxValue = underApproximation->getComputedValueAtInitialState();
+                        } else {
+                            break;
+                        }
                     }
-                    ++statistics.refinementSteps.get();
                 }
             }
 
@@ -305,8 +311,9 @@ namespace storm {
                 STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given");
                 STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero<ValueType>(), "Can not refine with negative  aggressiveness.");
                 STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one<ValueType>(), "Refinement with aggressiveness > 1 is invalid.");
-                uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
-                STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << ".");
+                
+                // current maximal resolution (needed for refinement heuristic)
+                uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
 
                 statistics.overApproximationBuildTime.start();
                 storm::storage::BitVector refinedObservations;
@@ -319,12 +326,12 @@ namespace storm {
                     }
                 } else {
                     // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement.
-                    auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution);
+                    auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution);
                     ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
                     // Potentially increase the aggressiveness so that at least one observation actually gets refinement.
                     *refinementAggressiveness = std::max(minRating, *refinementAggressiveness);
                     refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;});
-                    STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
+                    STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
                     for (auto const& obs : refinedObservations) {
                         // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness)
                         ValueType incrementValue = storm::utility::one<ValueType>() + (*refinementAggressiveness) * storm::utility::convertNumber<ValueType>(observationResolutionVector[obs]);
@@ -332,6 +339,7 @@ namespace storm {
                     }
                     overApproximation->restartExploration();
                 }
+                statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
                 
                 // Start exploration
                 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
@@ -360,7 +368,7 @@ namespace storm {
                                 overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations);
                                 for (auto const& obsInfo : gatheredSuccessorObservations) {
                                     if (refinedObservations.get(obsInfo.first)) {
-                                        ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution);
+                                        ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], oldMaxResolution);
                                         stateActionRating = std::min(stateActionRating, obsRating);
                                     }
                                 }
@@ -411,7 +419,8 @@ namespace storm {
                 
                 overApproximation->finishExploration();
                 statistics.overApproximationBuildTime.stop();
-
+                STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states.");
+                
                 statistics.overApproximationCheckTime.start();
                 overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.overApproximationCheckTime.stop();
@@ -421,6 +430,7 @@ namespace storm {
             void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
                 
                 statistics.underApproximationBuildTime.start();
+                statistics.underApproximationStateLimit = maxStateCount;
                 if (!underApproximation->hasComputedValues()) {
                     // Build a new under approximation
                     if (computeRewards) {
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index f895a3138..1d5521b6a 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -96,11 +96,13 @@ namespace storm {
                     bool overApproximationBuildAborted;
                     storm::utility::Stopwatch overApproximationBuildTime;
                     storm::utility::Stopwatch overApproximationCheckTime;
+                    boost::optional<uint64_t> overApproximationMaxResolution;
                     
                     boost::optional<uint64_t> underApproximationStates;
                     bool underApproximationBuildAborted;
                     storm::utility::Stopwatch underApproximationBuildTime;
                     storm::utility::Stopwatch underApproximationCheckTime;
+                    boost::optional<uint64_t> underApproximationStateLimit;
                     
                     bool aborted;
                 };

From 961baa43868315b5c8eaef8c21eeffd962748ffb Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 12:43:33 +0200
Subject: [PATCH 26/40] BeliefMdpExplorer: Various bugfixes for exploration
 restarts. Unexplored (= unreachable) states are now dropped before building
 the MDP since we do not get a valid MDP otherwise.

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 186 ++++++++++++++------
 1 file changed, 131 insertions(+), 55 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index 2a97c5e05..d59b770f0 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -62,33 +62,36 @@ namespace storm {
                 exploredChoiceIndices.clear();
                 mdpActionRewards.clear();
                 exploredMdp = nullptr;
-                currentMdpState = noState();
-                
+                internalAddRowGroupIndex(); // Mark the start of the first row group
+
                 // Add some states with special treatment (if requested)
                 if (extraBottomStateValue) {
-                    extraBottomState = getCurrentNumberOfMdpStates();
+                    currentMdpState = getCurrentNumberOfMdpStates();
+                    extraBottomState = currentMdpState;
                     mdpStateToBeliefIdMap.push_back(beliefManager->noId());
                     insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
 
-                    internalAddRowGroupIndex();
                     internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one<ValueType>());
+                    internalAddRowGroupIndex();
                 } else {
                     extraBottomState = boost::none;
                 }
                 if (extraTargetStateValue) {
-                    extraTargetState = getCurrentNumberOfMdpStates();
+                    currentMdpState = getCurrentNumberOfMdpStates();
+                    extraTargetState = currentMdpState;
                     mdpStateToBeliefIdMap.push_back(beliefManager->noId());
                     insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
                     
-                    internalAddRowGroupIndex();
                     internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one<ValueType>());
+                    internalAddRowGroupIndex();
                     
                     targetStates.grow(getCurrentNumberOfMdpStates(), false);
                     targetStates.set(extraTargetState.get(), true);
                 } else {
                     extraTargetState = boost::none;
                 }
-                
+                currentMdpState = noState();
+
                 // Set up the initial state.
                 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
             }
@@ -101,6 +104,7 @@ namespace storm {
              */
             void restartExploration() {
                 STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status.");
+                status = Status::Exploring;
                  // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid.
                 exploredBeliefIds.clear();
                 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
@@ -124,6 +128,7 @@ namespace storm {
                 if (extraTargetState) {
                     currentMdpState = extraTargetState.get();
                     restoreOldBehaviorAtCurrentState(0);
+                    targetStates.set(extraTargetState.get(), true);
                 }
                 currentMdpState = noState();
                 
@@ -138,23 +143,22 @@ namespace storm {
     
             BeliefId exploreNextState() {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                // Mark the end of the previously explored row group.
+                if (currentMdpState != noState() && !currentStateHasOldBehavior()) {
+                    internalAddRowGroupIndex();
+                }
                 
                 // Pop from the queue.
                 currentMdpState = mdpStatesToExplore.front();
                 mdpStatesToExplore.pop_front();
                 
-                if (!currentStateHasOldBehavior()) {
-                    internalAddRowGroupIndex();
-                }
                 
                 return mdpStateToBeliefIdMap[currentMdpState];
             }
             
             void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                // We first insert the entries of the current row in a separate map.
-                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
-                
+                STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP.");
                 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
                 if (!storm::utility::isZero(bottomStateValue)) {
                     STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
@@ -168,6 +172,7 @@ namespace storm {
             
             void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP.");
                 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
                 internalAddTransition(row, getCurrentMdpState(), value);
             }
@@ -182,8 +187,8 @@ namespace storm {
              */
             bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                // We first insert the entries of the current row in a separate map.
-                // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
+                STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP.");
+
                 MdpStateType column;
                 if (ignoreNewBeliefs) {
                     column = getExploredMdpState(transitionTarget);
@@ -221,6 +226,7 @@ namespace storm {
             
             bool currentStateHasOldBehavior() {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state.");
                 return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
             }
             
@@ -232,6 +238,8 @@ namespace storm {
              */
             void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) {
                 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any.");
+                STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP.");
+
                 uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex;
                 STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index.");
                 
@@ -255,10 +263,27 @@ namespace storm {
             void finishExploration() {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
                 STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states.");
+                
+                // Complete the exploration
                 // Finish the last row grouping in case the last explored state was new
                 if (!currentStateHasOldBehavior()) {
                     internalAddRowGroupIndex();
                 }
+                // Resize state- and choice based vectors to the correct size
+                targetStates.resize(getCurrentNumberOfMdpStates(), false);
+                truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
+                if (!mdpActionRewards.empty()) {
+                    mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
+                }
+
+                // We are not exploring anymore
+                currentMdpState = noState();
+                
+                // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build).
+                // We get rid of these before rebuilding the model
+                if (exploredMdp) {
+                    dropUnexploredStates();
+                }
                 
                 // Create the tranistion matrix
                 uint64_t entryCount = 0;
@@ -300,50 +325,101 @@ namespace storm {
                 status = Status::ModelFinished;
             }
 
-            void dropUnreachableStates() {
-                STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status.");
-                auto reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(),
-                                                                                 storm::storage::BitVector(getCurrentNumberOfMdpStates(), std::vector<uint64_t>{initialMdpState}),
-                                                                                 storm::storage::BitVector(getCurrentNumberOfMdpStates(), true),
-                                                                                 getExploredMdp()->getStateLabeling().getStates("target"));
-                auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates);
-                auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates);
-                std::vector<BeliefId> reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits());
-                std::vector<ValueType> reachableLowerValueBounds(reachableStates.getNumberOfSetBits());
-                std::vector<ValueType> reachableUpperValueBounds(reachableStates.getNumberOfSetBits());
-                std::vector<ValueType> reachableValues(reachableStates.getNumberOfSetBits());
-                std::vector<ValueType> reachableMdpActionRewards;
-                for (uint64_t state = 0; state < reachableStates.size(); ++state) {
-                    if (reachableStates[state]) {
-                        reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]);
-                        reachableLowerValueBounds.push_back(lowerValueBounds[state]);
-                        reachableUpperValueBounds.push_back(upperValueBounds[state]);
-                        reachableValues.push_back(values[state]);
-                        if (getExploredMdp()->hasRewardModel()) {
-                            //TODO FIXME is there some mismatch with the indices here?
-                            for (uint64_t i = 0; i < getExploredMdp()->getTransitionMatrix().getRowGroupSize(state); ++i) {
-                                reachableMdpActionRewards.push_back(getExploredMdp()->getUniqueRewardModel().getStateActionRewardVector()[state + i]);
-                            }
+            void dropUnexploredStates() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states.");
+                
+                STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available.");
+                // Find the states (and corresponding choices) that were not explored.
+                // These correspond to "empty" MDP transitions
+                storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true);
+                std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState());
+                MdpStateType nextRelevantIndex = 0;
+                for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
+                    uint64_t rowIndex = exploredChoiceIndices[groupIndex];
+                    // Check first row in group
+                    if (exploredMdpTransitions[rowIndex].empty()) {
+                        relevantMdpChoices.set(rowIndex, false);
+                        relevantMdpStates.set(groupIndex, false);
+                    } else {
+                        toRelevantStateIndexMap[groupIndex] = nextRelevantIndex;
+                        ++nextRelevantIndex;
+                    }
+                    uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
+                    // process remaining rows in group
+                    for (++rowIndex; rowIndex < groupEnd; ++rowIndex) {
+                        // Assert that all actions at the current state were consistently explored or unexplored.
+                        STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored.");
+                        if (exploredMdpTransitions[rowIndex].empty()) {
+                            relevantMdpChoices.set(rowIndex, false);
                         }
                     }
-                    //TODO drop BeliefIds from exploredBeliefIDs?
                 }
-                std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
-                if (!reachableMdpActionRewards.empty()) {
-                    //reachableMdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
-                    mdpRewardModels.emplace("default",
-                                            storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(reachableMdpActionRewards)));
+                
+                if (relevantMdpStates.full()) {
+                    // All states are relevant so nothing to do
+                    return;
                 }
-                storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling),
-                                                                                   std::move(mdpRewardModels));
-                exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
-
-                std::map<BeliefId, MdpStateType> reachableBeliefIdToMdpStateMap;
-                for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) {
-                    reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state;
+                
+                // Translate various components to the "new" MDP state set
+                storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates);
+                { // beliefIdToMdpStateMap
+                    for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) {
+                        if (relevantMdpStates.get(belIdToMdpStateIt->second)) {
+                            // Keep current entry and move on to the next one.
+                            ++belIdToMdpStateIt;
+                        } else {
+                            STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId");
+                            // Delete current entry and move on to the next one.
+                            // This works because std::map::erase does not invalidate other iterators within the map!
+                            beliefIdToMdpStateMap.erase(belIdToMdpStateIt++);
+                        }
+                    }
+                }
+                { // exploredMdpTransitions
+                    storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices);
+                    // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map
+                    // It might payoff to do this when building the matrix.
+                    for (auto& transitions : exploredMdpTransitions) {
+                        std::map<MdpStateType, ValueType> newTransitions;
+                        for (auto const& entry : transitions) {
+                            STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state.");
+                            newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second);
+                        }
+                        transitions = std::move(newTransitions);
+                    }
                 }
-                mdpStateToBeliefIdMap = reachableMdpStateToBeliefIdMap;
-                beliefIdToMdpStateMap = reachableBeliefIdToMdpStateMap;
+                { // exploredChoiceIndices
+                    MdpStateType newState = 0;
+                    assert(exploredChoiceIndices[0] == 0u);
+                    // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched.
+                    for (auto const& oldState : relevantMdpStates) {
+                        if (oldState != newState) {
+                            assert(oldState > newState);
+                            uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState];
+                            exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize;
+                        }
+                        ++newState;
+                    }
+                    exploredChoiceIndices.resize(newState + 1);
+                }
+                if (!mdpActionRewards.empty()) {
+                    storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices);
+                }
+                if (extraBottomState) {
+                    extraBottomState = toRelevantStateIndexMap[extraBottomState.get()];
+                }
+                if (extraTargetState) {
+                    extraTargetState = toRelevantStateIndexMap[extraTargetState.get()];
+                }
+                targetStates = targetStates % relevantMdpStates;
+                truncatedStates = truncatedStates % relevantMdpStates;
+                initialMdpState = toRelevantStateIndexMap[initialMdpState];
+                
+                storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates);
+                storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates);
+                storm::utility::vector::filterVectorInPlace(values, relevantMdpStates);
+
             }
 
             std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
@@ -364,7 +440,7 @@ namespace storm {
     
             MdpStateType getStartOfCurrentRowGroup() const {
                 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
-                return exploredChoiceIndices.back();
+                return exploredChoiceIndices[getCurrentMdpState()];
             }
 
             ValueType getLowerValueBoundAtCurrentState() const {

From 34d6ac9fe1f0e06ab209a51d91ba1adae0aaa3b0 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 12:44:14 +0200
Subject: [PATCH 27/40] Fixed computing a state limit for the
 under-approximation.

---
 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index fb95b0838..bea230599 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -252,7 +252,7 @@ namespace storm {
                     
                     if (result.diff() > options.refinementPrecision) {
                         // Refine under-approximation
-                        underApproxSizeThreshold *= storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType>(underApproxSizeThreshold) * (storm::utility::one<ValueType>() + refinementAggressiveness));
+                        underApproxSizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType>(underApproxSizeThreshold) * (storm::utility::one<ValueType>() + refinementAggressiveness));
                         underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
                         STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << ".");
                         buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);

From 5cd4281133b986360c7f5583516220c2ab117c3a Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 12:50:12 +0200
Subject: [PATCH 28/40] Further output improvements.

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h                   | 4 +++-
 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index d59b770f0..c8f1775ed 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -278,7 +278,7 @@ namespace storm {
 
                 // We are not exploring anymore
                 currentMdpState = noState();
-                
+
                 // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build).
                 // We get rid of these before rebuilding the model
                 if (exploredMdp) {
@@ -323,6 +323,8 @@ namespace storm {
                 storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
                 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
                 status = Status::ModelFinished;
+                STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << "of which were flagged as truncated).");
+
             }
 
             void dropUnexploredStates() {
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index bea230599..eb116fc7b 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -419,7 +419,6 @@ namespace storm {
                 
                 overApproximation->finishExploration();
                 statistics.overApproximationBuildTime.stop();
-                STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states.");
                 
                 statistics.overApproximationCheckTime.start();
                 overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);

From 26864067cf7930078f54eed972096f7db5bd20b8 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 13:04:38 +0200
Subject: [PATCH 29/40] BeliefManager: Made several methods private to hide the
 actual BeliefType.

---
 src/storm-pomdp/storage/BeliefManager.h | 164 +++++++++++++-----------
 1 file changed, 86 insertions(+), 78 deletions(-)

diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index 8f0dcd225..0731aef35 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -44,6 +44,92 @@ namespace storm {
                 }
             };
             
+            BeliefId noId() const {
+                return std::numeric_limits<BeliefId>::max();
+            }
+            
+            bool isEqual(BeliefId const& first, BeliefId const& second) const {
+                return isEqual(getBelief(first), getBelief(second));
+            }
+
+            std::string toString(BeliefId const& beliefId) const {
+                return toString(getBelief(beliefId));
+            }
+
+            
+            std::string toString(Triangulation const& t) const {
+                std::stringstream str;
+                str << "(\n";
+                for (uint64_t i = 0; i < t.size(); ++i) {
+                    str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n";
+                }
+                str <<")\n";
+                return str.str();
+            }
+            
+            template <typename SummandsType>
+            ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) {
+                ValueType result = storm::utility::zero<ValueType>();
+                for (auto const& entry : getBelief(beliefId)) {
+                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
+                }
+                return result;
+            }
+            
+            
+            BeliefId const& getInitialBelief() const {
+                return initialBeliefId;
+            }
+            
+            ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const {
+                auto const& belief = getBelief(beliefId);
+                STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
+                auto result = storm::utility::zero<ValueType>();
+                auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
+                for (auto const &entry : belief) {
+                    uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex;
+                    STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index.");
+                    STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index.");
+                    result += entry.second * pomdpActionRewardVector[choiceIndex];
+                }
+                return result;
+            }
+            
+            uint32_t getBeliefObservation(BeliefId beliefId) {
+                return getBeliefObservation(getBelief(beliefId));
+            }
+            
+            uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
+                auto belief = getBelief(beliefId);
+                return pomdp.getNumberOfChoices(belief.begin()->first);
+            }
+            
+            Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) {
+                return triangulateBelief(getBelief(beliefId), resolution);
+            }
+            
+            template<typename DistributionType>
+            void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
+                auto insertionRes = distr.emplace(state, value);
+                if (!insertionRes.second) {
+                    insertionRes.first->second += value;
+                }
+            }
+            
+            BeliefId getNumberOfBeliefIds() const {
+                return beliefs.size();
+            }
+            
+            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+                return expandInternal(beliefId, actionIndex, observationResolutions);
+            }
+            
+            std::map<BeliefId, ValueType> expand(BeliefId const& beliefId, uint64_t actionIndex) {
+                return expandInternal(beliefId, actionIndex);
+            }
+            
+        private:
+            
             BeliefType const& getBelief(BeliefId const& id) const {
                 STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief.");
                 STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range.");
@@ -56,10 +142,6 @@ namespace storm {
                 return idIt->second;
             }
             
-            BeliefId noId() const {
-                return std::numeric_limits<BeliefId>::max();
-            }
-            
             std::string toString(BeliefType const& belief) const {
                 std::stringstream str;
                 str << "{ ";
@@ -76,16 +158,6 @@ namespace storm {
                 return str.str();
             }
             
-            std::string toString(Triangulation const& t) const {
-                std::stringstream str;
-                str << "(\n";
-                for (uint64_t i = 0; i < t.size(); ++i) {
-                    str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n";
-                }
-                str <<")\n";
-                return str.str();
-            }
-            
             bool isEqual(BeliefType const& first, BeliefType const& second) const {
                 if (first.size() != second.size()) {
                     return false;
@@ -186,49 +258,11 @@ namespace storm {
                 return true;
             }
             
-            template <typename SummandsType>
-            ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) {
-                ValueType result = storm::utility::zero<ValueType>();
-                for (auto const& entry : getBelief(beliefId)) {
-                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
-                }
-                return result;
-            }
-            
-            
-            BeliefId const& getInitialBelief() const {
-                return initialBeliefId;
-            }
-            
-            ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const {
-                auto const& belief = getBelief(beliefId);
-                STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
-                auto result = storm::utility::zero<ValueType>();
-                auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
-                for (auto const &entry : belief) {
-                    uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex;
-                    STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index.");
-                    STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index.");
-                    result += entry.second * pomdpActionRewardVector[choiceIndex];
-                }
-                return result;
-            }
-            
             uint32_t getBeliefObservation(BeliefType belief) {
                 STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief.");
                 return pomdp.getObservation(belief.begin()->first);
             }
             
-            uint32_t getBeliefObservation(BeliefId beliefId) {
-                return getBeliefObservation(getBelief(beliefId));
-            }
-            
-            uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
-                auto belief = getBelief(beliefId);
-                return pomdp.getNumberOfChoices(belief.begin()->first);
-            }
-            
-
             Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
                 //TODO this can also be simplified using the sparse vector interpretation
                 //TODO Enable chaching for this method?
@@ -307,22 +341,6 @@ namespace storm {
                 return result;
             }
             
-            Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) {
-                return triangulateBelief(getBelief(beliefId), resolution);
-            }
-            
-            template<typename DistributionType>
-            void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
-                auto insertionRes = distr.emplace(state, value);
-                if (!insertionRes.second) {
-                    insertionRes.first->second += value;
-                }
-            }
-            
-            BeliefId getNumberOfBeliefIds() const {
-                return beliefs.size();
-            }
-            
             std::map<BeliefId, ValueType> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
                 std::map<BeliefId, ValueType> destinations;
                 // TODO: Output as vector?
@@ -369,16 +387,6 @@ namespace storm {
                 
             }
             
-            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
-                return expandInternal(beliefId, actionIndex, observationResolutions);
-            }
-            
-            std::map<BeliefId, ValueType> expand(BeliefId const& beliefId, uint64_t actionIndex) {
-                return expandInternal(beliefId, actionIndex);
-            }
-            
-        private:
-            
             BeliefId computeInitialBelief() {
                 STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
                                  "POMDP contains more than one initial state");

From eca4dab6c069642731639aea3a0b1dcb0cb8736f Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 13:43:49 +0200
Subject: [PATCH 30/40] Beliefmanager: expanding a belief now returns a vector
 instead of a map

---
 src/storm-pomdp/storage/BeliefManager.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index 0731aef35..b74390a79 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -120,11 +120,11 @@ namespace storm {
                 return beliefs.size();
             }
             
-            std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
+            std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
                 return expandInternal(beliefId, actionIndex, observationResolutions);
             }
             
-            std::map<BeliefId, ValueType> expand(BeliefId const& beliefId, uint64_t actionIndex) {
+            std::vector<std::pair<BeliefId, ValueType>> expand(BeliefId const& beliefId, uint64_t actionIndex) {
                 return expandInternal(beliefId, actionIndex);
             }
             
@@ -341,8 +341,8 @@ namespace storm {
                 return result;
             }
             
-            std::map<BeliefId, ValueType> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
-                std::map<BeliefId, ValueType> destinations;
+            std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
+                std::vector<std::pair<BeliefId, ValueType>> destinations;
                 // TODO: Output as vector?
                 
                 BeliefType belief = getBelief(beliefId);
@@ -373,13 +373,15 @@ namespace storm {
                     }
                     STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
                     
+                    // Insert the destination. We know that destinations have to be disjoined since they have different observations
                     if (observationTriangulationResolutions) {
                         Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]);
                         for (size_t j = 0; j < triangulation.size(); ++j) {
-                            addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
+                            // Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times
+                            destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
                         }
                     } else {
-                        addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second);
+                        destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second);
                     }
                 }
     

From 937659f3565f51c9b0850a2d3b7452fb86cbc23e Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 7 Apr 2020 15:32:08 +0200
Subject: [PATCH 31/40] First improvement step for Freudenthal triangulation

---
 src/storm-pomdp/storage/BeliefManager.h | 130 +++++++++++++-----------
 1 file changed, 73 insertions(+), 57 deletions(-)

diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index b74390a79..7e37b9c16 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -263,77 +263,93 @@ namespace storm {
                 return pomdp.getObservation(belief.begin()->first);
             }
             
+            struct FreudenthalData {
+                FreudenthalData(StateType const& pomdpState, StateType const& dimension, BeliefValueType const& x) : pomdpState(pomdpState), dimension(dimension), value(storm::utility::floor(x)), diff(x-value) { };
+                StateType pomdpState;
+                StateType dimension; // i
+                BeliefValueType value; // v[i] in the Lovejoy paper
+                BeliefValueType diff; // d[i] in the Lovejoy paper
+            };
+            struct FreudenthalDataComparator {
+                bool operator()(FreudenthalData const& first, FreudenthalData const& second) const {
+                    if (first.diff != second.diff) {
+                        return first.diff > second.diff;
+                    } else {
+                        return first.dimension < second.dimension;
+                    }
+                }
+            };
+            
             Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
-                //TODO this can also be simplified using the sparse vector interpretation
                 //TODO Enable chaching for this method?
                 STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
                 
-                auto nrStates = pomdp.getNumberOfStates();
+                auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
                 
                 // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
                 // Variable names are based on the paper
-                // TODO avoid reallocations for these vectors
-                std::vector<BeliefValueType> x(nrStates);
-                std::vector<BeliefValueType> v(nrStates);
-                std::vector<BeliefValueType> d(nrStates);
-                auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
-
-                for (size_t i = 0; i < nrStates; ++i) {
-                    for (auto const &probEntry : belief) {
-                        if (probEntry.first >= i) {
-                            x[i] += convResolution * probEntry.second;
-                        }
-                    }
-                    v[i] = storm::utility::floor(x[i]);
-                    d[i] = x[i] - v[i];
+                // However, we speed this up a little by exploiting that belief states usually have sparse support.
+                // TODO: for the sorting, it probably suffices to have a map from diffs to dimensions. The other Freudenthaldata could then also be stored in vectors, which would be a bit more like the original algorithm
+                
+                // Initialize some data
+                std::vector<typename std::set<FreudenthalData, FreudenthalDataComparator>::iterator> dataIterators;
+                dataIterators.reserve(belief.size());
+                // Initialize first row of 'qs' matrix
+                std::vector<BeliefValueType> qsRow;
+                qsRow.reserve(dataIterators.size());
+                std::set<FreudenthalData, FreudenthalDataComparator> freudenthalData;
+                BeliefValueType x = convResolution;
+                for (auto const& entry : belief) {
+                    auto insertionIt = freudenthalData.emplace(entry.first, dataIterators.size(), x).first;
+                    dataIterators.push_back(insertionIt);
+                    qsRow.push_back(dataIterators.back()->value);
+                    x -= entry.second * convResolution;
                 }
-
-                auto p = storm::utility::vector::getSortedIndices(d);
-
-                std::vector<std::vector<BeliefValueType>> qs(nrStates, std::vector<BeliefValueType>(nrStates));
-                for (size_t i = 0; i < nrStates; ++i) {
-                    if (i == 0) {
-                        for (size_t j = 0; j < nrStates; ++j) {
-                            qs[i][j] = v[j];
-                        }
-                    } else {
-                        for (size_t j = 0; j < nrStates; ++j) {
-                            if (j == p[i - 1]) {
-                                qs[i][j] = qs[i - 1][j] + storm::utility::one<BeliefValueType>();
-                            } else {
-                                qs[i][j] = qs[i - 1][j];
-                            }
+                qsRow.push_back(storm::utility::zero<BeliefValueType>());
+                assert(!freudenthalData.empty());
+                
+                Triangulation result;
+                result.weights.reserve(freudenthalData.size());
+                result.gridPoints.reserve(freudenthalData.size());
+                
+                // Insert first grid point
+                // TODO: this special treatment is actually not necessary.
+                BeliefValueType firstWeight = storm::utility::one<ValueType>() - freudenthalData.begin()->diff + freudenthalData.rbegin()->diff;
+                if (!cc.isZero(firstWeight)) {
+                    result.weights.push_back(firstWeight);
+                    BeliefType gridPoint;
+                    for (StateType j = 0; j < dataIterators.size(); ++j) {
+                        BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
+                        if (!cc.isZero(gridPointEntry)) {
+                            gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution;
                         }
                     }
+                    result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
                 }
                 
-                Triangulation result;
-                // The first weight is 1-sum(other weights). We therefore process the js in reverse order
-                BeliefValueType firstWeight = storm::utility::one<BeliefValueType>();
-                for (size_t j = nrStates; j > 0;) {
-                    --j;
-                    // First create the weights. The weights vector will be reversed at the end.
-                    ValueType weight;
-                    if (j == 0) {
-                        weight = firstWeight;
-                    } else {
-                        weight = d[p[j - 1]] - d[p[j]];
-                        firstWeight -= weight;
-                    }
-                    if (!cc.isZero(weight)) {
-                        result.weights.push_back(weight);
-                        BeliefType gridPoint;
-                        auto const& qsj = qs[j];
-                        for (size_t i = 0; i < nrStates - 1; ++i) {
-                            BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1];
-                            if (!cc.isZero(gridPointEntry)) {
-                                gridPoint[i] = gridPointEntry / convResolution;
+                if (freudenthalData.size() > 1) {
+                    // Insert remaining grid points
+                    auto currentSortedEntry = freudenthalData.begin();
+                    auto previousSortedEntry = currentSortedEntry++;
+                    for (StateType i = 1; i < dataIterators.size(); ++i) {
+                        // 'compute' the next row of the qs matrix
+                        qsRow[previousSortedEntry->dimension] += storm::utility::one<BeliefValueType>();
+                        
+                        BeliefValueType weight = previousSortedEntry->diff - currentSortedEntry->diff;
+                        if (!cc.isZero(weight)) {
+                            result.weights.push_back(weight);
+                            
+                            BeliefType gridPoint;
+                            for (StateType j = 0; j < dataIterators.size(); ++j) {
+                                BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
+                                if (!cc.isZero(gridPointEntry)) {
+                                    gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution;
+                                }
                             }
+                            result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
                         }
-                        if (!cc.isZero(qsj[nrStates - 1])) {
-                            gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution;
-                        }
-                        result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
+                        ++previousSortedEntry;
+                        ++currentSortedEntry;
                     }
                 }
                 

From 2f020ce6860af6cb45a460aca32e1d52f910df74 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 08:12:32 +0200
Subject: [PATCH 32/40] BeliefManager: Making Freudenthal happy (and fast)

---
 src/storm-pomdp/storage/BeliefManager.h | 122 ++++++++++--------------
 1 file changed, 53 insertions(+), 69 deletions(-)

diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index 7e37b9c16..95c596005 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -263,96 +263,80 @@ namespace storm {
                 return pomdp.getObservation(belief.begin()->first);
             }
             
-            struct FreudenthalData {
-                FreudenthalData(StateType const& pomdpState, StateType const& dimension, BeliefValueType const& x) : pomdpState(pomdpState), dimension(dimension), value(storm::utility::floor(x)), diff(x-value) { };
-                StateType pomdpState;
+            struct FreudenthalDiff {
+                FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { };
                 StateType dimension; // i
-                BeliefValueType value; // v[i] in the Lovejoy paper
-                BeliefValueType diff; // d[i] in the Lovejoy paper
-            };
-            struct FreudenthalDataComparator {
-                bool operator()(FreudenthalData const& first, FreudenthalData const& second) const {
-                    if (first.diff != second.diff) {
-                        return first.diff > second.diff;
+                BeliefValueType diff; // d[i]
+                bool operator>(FreudenthalDiff const& other) const {
+                    if (diff != other.diff) {
+                        return diff > other.diff;
                     } else {
-                        return first.dimension < second.dimension;
+                        return dimension < other.dimension;
                     }
                 }
             };
             
             Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
-                //TODO Enable chaching for this method?
                 STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
-                
-                auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
-                
-                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
-                // Variable names are based on the paper
-                // However, we speed this up a little by exploiting that belief states usually have sparse support.
-                // TODO: for the sorting, it probably suffices to have a map from diffs to dimensions. The other Freudenthaldata could then also be stored in vectors, which would be a bit more like the original algorithm
-                
-                // Initialize some data
-                std::vector<typename std::set<FreudenthalData, FreudenthalDataComparator>::iterator> dataIterators;
-                dataIterators.reserve(belief.size());
-                // Initialize first row of 'qs' matrix
-                std::vector<BeliefValueType> qsRow;
-                qsRow.reserve(dataIterators.size());
-                std::set<FreudenthalData, FreudenthalDataComparator> freudenthalData;
-                BeliefValueType x = convResolution;
-                for (auto const& entry : belief) {
-                    auto insertionIt = freudenthalData.emplace(entry.first, dataIterators.size(), x).first;
-                    dataIterators.push_back(insertionIt);
-                    qsRow.push_back(dataIterators.back()->value);
-                    x -= entry.second * convResolution;
-                }
-                qsRow.push_back(storm::utility::zero<BeliefValueType>());
-                assert(!freudenthalData.empty());
-                
+                StateType numEntries = belief.size();
                 Triangulation result;
-                result.weights.reserve(freudenthalData.size());
-                result.gridPoints.reserve(freudenthalData.size());
-                
-                // Insert first grid point
-                // TODO: this special treatment is actually not necessary.
-                BeliefValueType firstWeight = storm::utility::one<ValueType>() - freudenthalData.begin()->diff + freudenthalData.rbegin()->diff;
-                if (!cc.isZero(firstWeight)) {
-                    result.weights.push_back(firstWeight);
-                    BeliefType gridPoint;
-                    for (StateType j = 0; j < dataIterators.size(); ++j) {
-                        BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
-                        if (!cc.isZero(gridPointEntry)) {
-                            gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution;
-                        }
+
+                // Quickly triangulate Dirac beliefs
+                if (numEntries == 1u) {
+                    result.weights.push_back(storm::utility::one<BeliefValueType>());
+                    result.gridPoints.push_back(getOrAddBeliefId(belief));
+                } else {
+                    
+                    auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
+                    // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
+                    // Variable names are mostly based on the paper
+                    // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()).
+                    // Initialize diffs and the first row of the 'qs' matrix (aka v)
+                    std::set<FreudenthalDiff, std::greater<FreudenthalDiff>> sorted_diffs; // d (and p?) in the paper
+                    std::vector<BeliefValueType> qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v
+                    qsRow.reserve(numEntries);
+                    std::vector<StateType> toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices
+                    toOriginalIndicesMap.reserve(numEntries);
+                    BeliefValueType x = convResolution;
+                    for (auto const& entry : belief) {
+                        qsRow.push_back(storm::utility::floor(x)); // v
+                        sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v
+                        toOriginalIndicesMap.push_back(entry.first);
+                        x -= entry.second * convResolution;
                     }
-                    result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
-                }
-                
-                if (freudenthalData.size() > 1) {
-                    // Insert remaining grid points
-                    auto currentSortedEntry = freudenthalData.begin();
-                    auto previousSortedEntry = currentSortedEntry++;
-                    for (StateType i = 1; i < dataIterators.size(); ++i) {
-                        // 'compute' the next row of the qs matrix
-                        qsRow[previousSortedEntry->dimension] += storm::utility::one<BeliefValueType>();
-                        
-                        BeliefValueType weight = previousSortedEntry->diff - currentSortedEntry->diff;
+                    // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler
+                    qsRow.push_back(storm::utility::zero<BeliefValueType>());
+                    
+                    result.weights.reserve(numEntries);
+                    result.gridPoints.reserve(numEntries);
+                    auto currentSortedDiff = sorted_diffs.begin();
+                    auto previousSortedDiff = sorted_diffs.end();
+                    --previousSortedDiff;
+                    for (StateType i = 0; i < numEntries; ++i) {
+                        // Compute the weight for the grid points
+                        BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff;
+                        if (i == 0) {
+                            // The first weight is a bit different
+                            weight += storm::utility::one<ValueType>();
+                        } else {
+                            // 'compute' the next row of the qs matrix
+                            qsRow[previousSortedDiff->dimension] += storm::utility::one<BeliefValueType>();
+                        }
                         if (!cc.isZero(weight)) {
                             result.weights.push_back(weight);
-                            
+                            // Compute the grid point
                             BeliefType gridPoint;
-                            for (StateType j = 0; j < dataIterators.size(); ++j) {
+                            for (StateType j = 0; j < numEntries; ++j) {
                                 BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
                                 if (!cc.isZero(gridPointEntry)) {
-                                    gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution;
+                                    gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution;
                                 }
                             }
                             result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
                         }
-                        ++previousSortedEntry;
-                        ++currentSortedEntry;
+                        previousSortedDiff = currentSortedDiff++;
                     }
                 }
-                
                 STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result));
                 return result;
             }

From fcee1d05fabe15de82f6213a3bd91acf25492ea1 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 10:09:45 +0200
Subject: [PATCH 33/40] Fixed an issue with dropping unexplored states.

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index c8f1775ed..fd2b54862 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -323,7 +323,7 @@ namespace storm {
                 storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
                 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
                 status = Status::ModelFinished;
-                STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << "of which were flagged as truncated).");
+                STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated).");
 
             }
 
@@ -368,7 +368,8 @@ namespace storm {
                 { // beliefIdToMdpStateMap
                     for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) {
                         if (relevantMdpStates.get(belIdToMdpStateIt->second)) {
-                            // Keep current entry and move on to the next one.
+                            // Translate current entry and move on to the next one.
+                            belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second];
                             ++belIdToMdpStateIt;
                         } else {
                             STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId");
@@ -619,7 +620,7 @@ namespace storm {
                             return findRes->second;
                         }
                     }
-                    // At this poind we need to add a new MDP state
+                    // At this point we need to add a new MDP state
                     MdpStateType result = getCurrentNumberOfMdpStates();
                     assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
                     mdpStateToBeliefIdMap.push_back(beliefId);

From 26a0544e4ba075bfbc773f75ee5b1489f41ada1f Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 10:10:34 +0200
Subject: [PATCH 34/40] BeiliefManager: Use flat_maps for beliefs and hash_maps
 for belief storage.

---
 src/storm-pomdp/storage/BeliefManager.h | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h
index 95c596005..25ec3a3d2 100644
--- a/src/storm-pomdp/storage/BeliefManager.h
+++ b/src/storm-pomdp/storage/BeliefManager.h
@@ -1,9 +1,9 @@
 #pragma once
 
-#include <map>
+#include <unordered_map>
 #include <boost/optional.hpp>
-//#include <boost/container/flat_map.hpp>
-
+#include <boost/container/flat_map.hpp>
+#include "storm/adapters/RationalNumberAdapter.h"
 #include "storm/utility/macros.h"
 #include "storm/exceptions/UnexpectedException.h"
 
@@ -15,8 +15,7 @@ namespace storm {
         public:
             
             typedef typename PomdpType::ValueType ValueType;
-            //typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType
-            typedef std::map<StateType, BeliefValueType> BeliefType;
+            typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation)
             typedef uint64_t BeliefId;
             
             BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
@@ -343,7 +342,6 @@ namespace storm {
             
             std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
                 std::vector<std::pair<BeliefId, ValueType>> destinations;
-                // TODO: Output as vector?
                 
                 BeliefType belief = getBelief(beliefId);
                 
@@ -411,11 +409,23 @@ namespace storm {
                 return insertioRes.first->second;
             }
             
+            struct BeliefHash {
+                std::size_t operator()(const BeliefType& belief) const {
+                    std::size_t seed = 0;
+                    // Assumes that beliefs are ordered
+                    for (auto const& entry : belief) {
+                        boost::hash_combine(seed, entry.first);
+                        boost::hash_combine(seed, entry.second);
+                    }
+                    return seed;
+                }
+            };
+            
             PomdpType const& pomdp;
             std::vector<ValueType> pomdpActionRewardVector;
             
             std::vector<BeliefType> beliefs;
-            std::map<BeliefType, BeliefId> beliefToIdMap;
+            std::unordered_map<BeliefType, BeliefId, BeliefHash> beliefToIdMap;
             BeliefId initialBeliefId;
             
             storm::utility::ConstantsComparator<ValueType> cc;

From 03889958dab727f7a50f0f2f703a6a0c4fb055ec Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 11:29:37 +0200
Subject: [PATCH 35/40] Added a switch to control the size of the
 under-approximation via command line.

---
 .../modules/GridApproximationSettings.cpp        | 10 ++++++++++
 .../settings/modules/GridApproximationSettings.h |  3 ++-
 src/storm-pomdp-cli/storm-pomdp.cpp              |  1 +
 .../ApproximatePOMDPModelchecker.cpp             | 16 ++++++++++++++--
 .../modelchecker/ApproximatePOMDPModelchecker.h  |  1 +
 5 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp
index 8006b3851..6b5b17677 100644
--- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp
+++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp
@@ -19,6 +19,7 @@ namespace storm {
             const std::string limitBeliefExplorationOption = "limit-exploration";
             const std::string numericPrecisionOption = "numeric-precision";
             const std::string cacheSimplicesOption = "cache-simplices";
+            const std::string unfoldBeliefMdpOption = "unfold-belief-mdp";
 
             GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) {
                 
@@ -35,6 +36,7 @@ namespace storm {
                 
                 this->addOption(storm::settings::OptionBuilder(moduleName, cacheSimplicesOption, false,"Enables caching of simplices which requires more memory but can be faster.").build());
                 
+                this->addOption(storm::settings::OptionBuilder(moduleName, unfoldBeliefMdpOption, false,"Sets the (initial-) size threshold of the unfolded belief MDP (higher means more precise results, 0 means automatic choice)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the maximal number of states").setDefaultValueUnsignedInteger(0).build()).build());
             }
 
             bool GridApproximationSettings::isRefineSet() const {
@@ -65,6 +67,14 @@ namespace storm {
                 return this->getOption(cacheSimplicesOption).getHasOptionBeenSet();
             }
             
+            bool GridApproximationSettings::isUnfoldBeliefMdpSizeThresholdSet() const {
+                return this->getOption(unfoldBeliefMdpOption).getHasOptionBeenSet();
+            }
+            
+            uint64_t GridApproximationSettings::getUnfoldBeliefMdpSizeThreshold() const {
+                return this->getOption(unfoldBeliefMdpOption).getArgumentByName("value").getValueAsUnsignedInteger();
+            }
+            
         } // namespace modules
     } // namespace settings
 } // namespace storm
diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h
index a01fdbd77..88e484128 100644
--- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h
+++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h
@@ -27,7 +27,8 @@ namespace storm {
                 bool isNumericPrecisionSetFromDefault() const;
                 double getNumericPrecision() const;
                 bool isCacheSimplicesSet() const;
-                
+                bool isUnfoldBeliefMdpSizeThresholdSet() const;
+                uint64_t getUnfoldBeliefMdpSizeThreshold() const;
                 // The name of the module.
                 static const std::string moduleName;
 
diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp
index 9b3026832..181c5c31f 100644
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@@ -109,6 +109,7 @@ namespace storm {
                     options.refinementPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getRefinementPrecision());
                     options.numericPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getNumericPrecision());
                     options.cacheSubsimplices = gridSettings.isCacheSimplicesSet();
+                    options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
                     if (storm::NumberTraits<ValueType>::IsExact) {
                         if (gridSettings.isNumericPrecisionSetFromDefault()) {
                             STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used.");
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index eb116fc7b..070279ac4 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -35,6 +35,7 @@ namespace storm {
                 refinementPrecision = storm::utility::convertNumber<ValueType>(1e-4);
                 numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
                 cacheSubsimplices = false;
+                beliefMdpSizeThreshold = 0ull;
             }
             
             template<typename PomdpModelType, typename BeliefValueType>
@@ -180,7 +181,7 @@ namespace storm {
                         approx->getExploredMdp()->printModelInformationToStream(std::cout);
                         ValueType& resultValue = min ? result.lowerBound : result.upperBound;
                         resultValue = approx->getComputedValueAtInitialState();
-                        underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates();
+                        underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold);
                     }
                 }
                 { // Underapproximation (Uses a fresh Belief manager)
@@ -189,6 +190,12 @@ namespace storm {
                         manager->setRewardModel(rewardModelName);
                     }
                     auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    if (options.beliefMdpSizeThreshold) {
+                        underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
+                    }
+                    if (underApproxSizeThreshold == 0) {
+                        underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build)
+                    }
                     buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx);
                     if (approx->hasComputedValues()) {
                         STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
@@ -221,7 +228,12 @@ namespace storm {
                 overApproxValue = overApproximation->getComputedValueAtInitialState();
                 
                 // UnderApproximation
-                uint64_t underApproxSizeThreshold = std::max<uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates(), 10);
+                uint64_t underApproxSizeThreshold;
+                if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) {
+                    underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
+                } else {
+                    underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
+                }
                 auto underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                 buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
                 if (!underApproximation->hasComputedValues()) {
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 1d5521b6a..823eebf60 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -34,6 +34,7 @@ namespace storm {
                     ValueType refinementPrecision; /// Used to decide when the refinement should terminate
                     ValueType numericPrecision; /// Used to decide whether two values are equal
                     bool cacheSubsimplices; /// Enables caching of subsimplices
+                    boost::optional<uint64_t> beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection.
                 };
                 
                 struct Result {

From f4f9376c966489ce0ba288235c54360883590138 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 11:30:19 +0200
Subject: [PATCH 36/40] Vector: Added a method for element-wise comparison of
 two vectors.

---
 src/storm/utility/vector.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/storm/utility/vector.h b/src/storm/utility/vector.h
index dd562a8e1..af6098038 100644
--- a/src/storm/utility/vector.h
+++ b/src/storm/utility/vector.h
@@ -142,6 +142,12 @@ namespace storm {
                 return true;
             }
             
+            template<typename T, typename Comparator>
+            bool compareElementWise(std::vector<T> const& left, std::vector<T> const& right, Comparator comp = std::less<T>()) {
+                STORM_LOG_ASSERT(left.size() == right.size(), "Expected that vectors for comparison have equal size");
+                return std::equal(left.begin(), left.end(), right.begin(), comp);
+            }
+            
             /*!
              * Selects the elements from a vector at the specified positions and writes them consecutively into another vector.
              * @param vector The vector into which the selected elements are to be written.

From 3c5df045c1138457b5a98f49ec59a245cb0f7994 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 12:18:32 +0200
Subject: [PATCH 37/40] Added a few assertions

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h                     | 2 ++
 .../modelchecker/TrivialPomdpValueBoundsModelChecker.h          | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index fd2b54862..e5d233aec 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -473,6 +473,8 @@ namespace storm {
                 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task));
                 if (res) {
                     values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                    STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal<ValueType>()), "Computed values are smaller than the lower bound.");
+                    STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal<ValueType>()), "Computed values are larger than the upper bound.");
                 } else {
                     STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
                     STORM_LOG_ERROR("No result obtained while checking.");
diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
index 862a82a05..ca4c2192f 100644
--- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
+++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
@@ -88,6 +88,7 @@ namespace storm {
                             pomdpScheduler.setChoice(choiceDistribution, state);
                         }
                     }
+                    STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
                     auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
                     
                     auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
@@ -104,6 +105,7 @@ namespace storm {
                         result.lower = std::move(pomdpSchedulerResult);
                         result.upper = std::move(fullyObservableResult);
                     }
+                    STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal<ValueType>()), "Lower bound is larger than upper bound");
                     return result;
                 }
     

From 26764137f5cbc3a22181ab4e243d265f1943d03d Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 12:30:39 +0200
Subject: [PATCH 38/40] Fix for  --unfold-belief-mdp setting

---
 src/storm-pomdp-cli/storm-pomdp.cpp                           | 4 +++-
 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp
index 181c5c31f..19e139d1d 100644
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@@ -109,7 +109,9 @@ namespace storm {
                     options.refinementPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getRefinementPrecision());
                     options.numericPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getNumericPrecision());
                     options.cacheSubsimplices = gridSettings.isCacheSimplicesSet();
-                    options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
+                    if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) {
+                        options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
+                    }
                     if (storm::NumberTraits<ValueType>::IsExact) {
                         if (gridSettings.isNumericPrecisionSetFromDefault()) {
                             STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used.");
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 070279ac4..622e63512 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -35,7 +35,7 @@ namespace storm {
                 refinementPrecision = storm::utility::convertNumber<ValueType>(1e-4);
                 numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
                 cacheSubsimplices = false;
-                beliefMdpSizeThreshold = 0ull;
+                beliefMdpSizeThreshold = boost::none;
             }
             
             template<typename PomdpModelType, typename BeliefValueType>
@@ -190,7 +190,7 @@ namespace storm {
                         manager->setRewardModel(rewardModelName);
                     }
                     auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
-                    if (options.beliefMdpSizeThreshold) {
+                    if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) {
                         underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
                     }
                     if (underApproxSizeThreshold == 0) {

From 7ffe322e06e46595cb337a47d5b8b6dd2a4c9434 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 8 Apr 2020 12:31:45 +0200
Subject: [PATCH 39/40] SparseModelMemoryProduct: Fixed incorrect computation
 of state-action rewards under a randomized policy.

---
 src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp b/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp
index 3907f0625..1f30f23fb 100644
--- a/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp
+++ b/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp
@@ -410,7 +410,7 @@ namespace storm {
                                 if (isStateReachable(modelState, memoryState)) {
                                     if (scheduler && scheduler->getChoice(modelState, memoryState).isDefined()) {
                                         ValueType factor = scheduler->getChoice(modelState, memoryState).getChoiceAsDistribution().getProbability(rowOffset);
-                                        stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] = factor * modelStateActionReward;
+                                        stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] += factor * modelStateActionReward;
                                     } else {
                                         stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)] + rowOffset] = modelStateActionReward;
                                     }

From 5f2a598f48e26bc7c757eaeafd2266abb0cf51f0 Mon Sep 17 00:00:00 2001
From: Sebastian Junges <sebastian.junges@rwth-aachen.de>
Date: Tue, 14 Apr 2020 14:29:30 -0700
Subject: [PATCH 40/40] remove unsound 1-state computation

---
 .../analysis/QualitativeAnalysis.cpp          | 76 +------------------
 1 file changed, 2 insertions(+), 74 deletions(-)

diff --git a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp
index fc27620af..e288ed4ef 100644
--- a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp
+++ b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp
@@ -69,81 +69,9 @@ namespace storm {
         storm::storage::BitVector QualitativeAnalysis<ValueType>::analyseProb1Max(storm::logic::UntilFormula const& formula) const {
             // We consider the states that satisfy the formula with prob.1 under arbitrary schedulers as goal states.
             storm::storage::BitVector goalStates = storm::utility::graph::performProb1A(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), checkPropositionalFormula(formula.getLeftSubformula()), checkPropositionalFormula(formula.getRightSubformula()));
-            
+            STORM_LOG_TRACE("Prob1A states according to MDP: " << goalStates);
             // Now find a set of observations such that there is a memoryless scheduler inducing prob. 1 for each state whose observation is in the set.
-            storm::storage::BitVector candidateStates = goalStates | checkPropositionalFormula(formula.getLeftSubformula());
-            storm::storage::BitVector candidateActions = pomdp.getTransitionMatrix().getRowFilter(candidateStates);
-            storm::storage::BitVector candidateObservations(pomdp.getNrObservations(), true);
-            
-            
-            bool converged = false;
-            while (!converged) {
-                converged = true;
-                
-                // Get the candidate states that can reach the goal with prob1 via candidate actions
-                storm::storage::BitVector newCandidates;
-                if (candidateActions.full()) {
-                    newCandidates = storm::utility::graph::performProb1E(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), candidateStates, goalStates);
-                } else {
-                    storm::storage::SparseMatrix<ValueType> filteredTransitions(pomdp.getTransitionMatrix().filterEntries(candidateActions));
-                    newCandidates = storm::utility::graph::performProb1E(filteredTransitions, filteredTransitions.getRowGroupIndices(), filteredTransitions.transpose(true), candidateStates, goalStates);
-                }
-                if (candidateStates != newCandidates) {
-                    converged = false;
-                    candidateStates = std::move(newCandidates);
-                }
-                
-                // Unselect all observations that have a non-candidate state
-                for (uint64_t state = candidateStates.getNextUnsetIndex(0); state < candidateStates.size(); state = candidateStates.getNextUnsetIndex(state + 1)) {
-                    candidateObservations.set(pomdp.getObservation(state), false);
-                }
-                
-                // update the candidate actions to the set of actions that stay inside the candidate state set
-                std::vector<storm::storage::BitVector> candidateActionsPerObservation(pomdp.getNrObservations());
-                for (auto const& state : candidateStates) {
-                    auto& candidateActionsAtState = candidateActionsPerObservation[pomdp.getObservation(state)];
-                    if (candidateActionsAtState.size() == 0) {
-                        candidateActionsAtState.resize(pomdp.getNumberOfChoices(state), true);
-                    }
-                    STORM_LOG_ASSERT(candidateActionsAtState.size() == pomdp.getNumberOfChoices(state), "State " + std::to_string(state) + " has " + std::to_string(pomdp.getNumberOfChoices(state)) + " actions, different from other with same observation (" + std::to_string(candidateActionsAtState.size()) + ")." );
-                    for (auto const& action : candidateActionsAtState) {
-                        for (auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) {
-                            if (!candidateStates.get(entry.getColumn())) {
-                                candidateActionsAtState.set(action, false);
-                                break;
-                            }
-                        }
-                    }
-                }
-                
-                // Unselect all observations without such an action
-                for (auto const& o : candidateObservations) {
-                    if (candidateActionsPerObservation[o].empty()) {
-                        candidateObservations.set(o, false);
-                    }
-                }
-                
-                // only keep the candidate states with a candidateObservation
-                for (auto const& state : candidateStates) {
-                    if (!candidateObservations.get(pomdp.getObservation(state)) && !goalStates.get(state)) {
-                        candidateStates.set(state, false);
-                        converged = false;
-                    }
-                }
-                
-                // Only keep the candidate actions originating from a candidateState. Also transform the representation of candidate actions
-                candidateActions.clear();
-                for (auto const& state : candidateStates) {
-                    uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state];
-                    for (auto const& action : candidateActionsPerObservation[pomdp.getObservation(state)]) {
-                        candidateActions.set(offset + action);
-                    }
-                }
-            }
-            
-            assert(goalStates.isSubsetOf(candidateStates));
-            
-            return candidateStates;
+            return goalStates;
             
         }