Changed method signatures to new data structures.

6 years ago · 71e0654498
3 changed files with 200 additions and 682 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -6,6 +6,7 @@
 #include <map>
 #include <boost/optional.hpp>

+#include "storm-parsers/api/properties.h"
 #include "storm/api/properties.h"
 #include "storm/api/verification.h"

@ -13,20 +14,25 @@
 #include "storm/utility/macros.h"
 #include "storm-pomdp/storage/BeliefManager.h"
 #include "storm/utility/SignalHandler.h"
+#include "storm/modelchecker/results/CheckResult.h"
+#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
+#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
+#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"

 namespace storm {
    namespace builder {
-        template<typename PomdpType>
+        template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType>
        class BeliefMdpExplorer {
        public:
            typedef typename PomdpType::ValueType ValueType;
-            typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
+            typedef storm::storage::BeliefManager<PomdpType, BeliefValueType> BeliefManagerType;
            typedef typename BeliefManagerType::BeliefId BeliefId;
            typedef uint64_t MdpStateType;
            
            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
                // Intentionally left empty
            }
+            BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;

            void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
                // Reset data from potential previous explorations
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -15,13 +15,8 @@
 #include "storm/models/sparse/StandardRewardModel.h"
 #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h"
 #include "storm/utility/vector.h"
-#include "storm/modelchecker/results/CheckResult.h"
-#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
-#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
-#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
 #include "storm/api/properties.h"
 #include "storm/api/export.h"
-#include "storm-parsers/api/storm-parsers.h"
 #include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"

@ -32,8 +27,8 @@
 namespace storm {
    namespace pomdp {
        namespace modelchecker {
-            template<typename ValueType, typename RewardModelType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Options::Options() {
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() {
                initialGridResolution = 10;
                explorationThreshold = storm::utility::zero<ValueType>();
                doRefinement = true;
@ -41,52 +36,69 @@ namespace storm {
                numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
                cacheSubsimplices = false;
            }
-            template<typename ValueType, typename RewardModelType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Statistics::Statistics() :  overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) {
+                // Intentionally left empty
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::diff(bool relative) const {
+                ValueType diff = upperBound - lowerBound;
+                if (diff < storm::utility::zero<ValueType>()) {
+                    STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
+                    diff = storm::utility::zero<ValueType >();
+                }
+                if (relative && !storm::utility::isZero(upperBound)) {
+                    diff /= upperBound;
+                }
+                return diff;
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Statistics::Statistics() :  overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
                // intentionally left empty;
            }
            
-            template<typename ValueType, typename RewardModelType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options) : pomdp(pomdp), options(options) {
+            template<typename PomdpModelType, typename BeliefValueType>
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) {
                cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false);
            }

-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::check(storm::logic::Formula const& formula) {
+            template<typename PomdpModelType, typename BeliefValueType>
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) {
                // Reset all collected statistics
                statistics = Statistics();
-                std::unique_ptr<POMDPCheckResult<ValueType>> result;
                // Extract the relevant information from the formula
                auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
                
                // Compute some initial bounds on the values for each state of the pomdp
                auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
+                Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]);
                
-                if (formulaInfo.isNonNestedReachabilityProbability()) {
+                boost::optional<std::string> rewardModelName;
+                if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
+                    if (formulaInfo.isNonNestedReachabilityProbability()) {
                        if (!formulaInfo.getSinkStates().empty()) {
                            auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
                            reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
                            STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
                        }
-                    if (options.doRefinement) {
-                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
                    } else {
-                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
+                        // Expected reward formula!
+                        rewardModelName = formulaInfo.getRewardModelName();
                    }
-                } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
-                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
-                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
-                    if (options.doRefinement) {
-                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true);
                } else {
-                        // FIXME: pick the non-unique reward model here
-                        STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
-                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
+                    STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
                }
+                
+                if (options.doRefinement) {
+                    refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
                } else {
-                    STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
+                    computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
                }
                if (storm::utility::resources::isTerminate()) {
                    statistics.aborted = true;
@ -94,8 +106,8 @@ namespace storm {
                return result;
            }
            
-            template<typename ValueType, typename RewardModelType>
-            void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::printStatisticsToStream(std::ostream& stream) const {
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::printStatisticsToStream(std::ostream& stream) const {
                stream << "##### Grid Approximation Statistics ######" << std::endl;
                stream << "# Input model: " << std::endl;
                pomdp.printModelInformationToStream(stream);
@ -143,114 +155,82 @@ namespace storm {
                stream << "##########################################" << std::endl;
            }
            
-            std::shared_ptr<storm::logic::Formula const> createStandardProperty(bool min, bool computeRewards) {
-                std::string propertyString = computeRewards ? "R" : "P";
-                propertyString += min ? "min" : "max";
-                propertyString += "=? [F \"target\"]";
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                return storm::api::extractFormulasFromProperties(propertyVector).front();
+
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
+                
+                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
+                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
                
-            template<typename ValueType>
-            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property, std::vector<ValueType>&& hintVector) {
-                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
-                // Therefore, this method needs the property by reference (and not const reference)
-                auto task = storm::api::createTask<ValueType>(property, false);
-                if (!hintVector.empty()) {
-                    auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
-                    hint.setResultHint(std::move(hintVector));
-                    auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
-                    task.setHint(hintPtr);
-                }
-                return task;
-            }
-            
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards) {
-                std::srand(time(NULL));
-                // Compute easy upper and lower bounds
-                storm::utility::Stopwatch underlyingWatch(true);
-                // Compute the results on the underlying MDP as a basic overapproximation
-                storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling());
-                // TODO: Is the following really necessary
-                underlyingMdpLabeling.addLabel("__goal__");
-                std::vector<uint64_t> goalStates;
-                for (auto const &targetObs : targetObservations) {
-                    for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) {
-                        underlyingMdpLabeling.addLabelToState("__goal__", goalState);
-                    }
-                }
-                storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels());
-                auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(
-                        std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp));
-                std::string initPropString = computeRewards ? "R" : "P";
-                initPropString += min ? "min" : "max";
-                initPropString += "=? [F \"__goal__\"]";
-                std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString);
-                std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front();
-                STORM_PRINT("Underlying MDP" << std::endl)
-                if (computeRewards) {
-                    underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel());
-                }
-                underlyingMdp.printModelInformationToStream(std::cout);
-                std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(
-                        storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
-                STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
-                underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
-                auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                underlyingWatch.stop();
-
-                storm::utility::Stopwatch positionalWatch(true);
-                // we define some positional scheduler for the POMDP as a basic lower bound
-                storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
-                for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
-                    auto obsStates = pomdp.getStatesWithObservation(obs);
-                    // select a random action for all states with the same observation
-                    uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front());
-                    for (auto const &state : obsStates) {
-                        pomdpScheduler.setChoice(chosenAction, state);
-                    }
-                }
-                auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false);
-                if (computeRewards) {
-                    underApproxModel->restrictRewardModels({"std"});
-                }
-                STORM_PRINT("Random Positional Scheduler" << std::endl)
-                underApproxModel->printModelInformationToStream(std::cout);
-                std::unique_ptr<storm::modelchecker::CheckResult> underapproxRes(
-                        storm::api::verifyWithSparseEngine<ValueType>(underApproxModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
-                STORM_LOG_ASSERT(underapproxRes, "Result not exist.");
-                underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true)));
-                auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                positionalWatch.stop();
-
-                STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // "
-                                                       << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl)
-                STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
-
-                // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
-                // This can probably be improved (i.e. resolutions for single belief states)
-                STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl)
+                uint64_t underApproxSizeThreshold = 0;
+                { // Overapproximation
                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                std::set<uint32_t> changedObservations;
-                uint64_t underApproxModelSize = 200;
-                uint64_t refinementCounter = 1;
-                STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
-                std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
-                                                                                                  {},
-                                                                                                  {}, underApproxModelSize);
-                if (res == nullptr) {
-                    statistics.refinementSteps = 0;
-                    return nullptr;
+                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                    if (rewardModelName) {
+                        manager->setRewardModel(rewardModelName);
+                    }
+                    auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager);
+                    if (approx) {
+                        STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n");
+                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
+                        ValueType& resultValue = min ? result.lowerBound : result.upperBound;
+                        resultValue = approx->getComputedValueAtInitialState();
+                        underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates();
+                    }
+                }
+                { // Underapproximation (Uses a fresh Belief manager)
+                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                    if (rewardModelName) {
+                        manager->setRewardModel(rewardModelName);
+                    }
+                    auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager);
+                    if (approx) {
+                        STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
+                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
+                        ValueType& resultValue = min ? result.upperBound : result.lowerBound;
+                        resultValue = approx->getComputedValueAtInitialState();
+                    }
+                }
            }
-                ValueType lastMinScore = storm::utility::infinity<ValueType>();
-                while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) ||
-                                                    (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) {
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
+                
+                // Set up exploration data
+                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
+                auto beliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                if (rewardModelName) {
+                    beliefManager->setRewardModel(rewardModelName);
+                }
+                
+                // OverApproximaion
+                auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager);
+                if (!overApproximation) {
+                    return;
+                }
+                ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
+                overApproxValue = overApproximation->getComputedValueAtInitialState();
+                
+                // UnderApproximation TODO: use same belief manager?)
+                uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
+                auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager);
+                if (!underApproximation) {
+                    return;
+                }
+                ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
+                underApproxValue = underApproximation->getComputedValueAtInitialState();
+                
+                // ValueType lastMinScore = storm::utility::infinity<ValueType>();
+                // Start refinement
+                statistics.refinementSteps = 0;
+                while (result.diff() > options.refinementPrecision) {
                    if (storm::utility::resources::isTerminate()) {
                        break;
                    }
                    // TODO the actual refinement
+                    /*
                    // choose which observation(s) to refine
                    std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
                    std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
@ -286,9 +266,9 @@ namespace storm {
                    }


-                    /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
-                        obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
-                    }*/
+                    //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
+                       // obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
+                    //}
                    changedObservations.clear();

                    //TODO think about some other scoring methods
@ -301,21 +281,21 @@ namespace storm {
                        observationResolutionVector[i] = maxRes + 1;
                        changedObservations.insert(i);
                    }
-                    /*} else {
-                        lastMinScore = std::min(maxAvgDifference, lastMinScore);
-                        STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
-                        STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
-                        //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
-                        for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
+                    //} else {
+                   //     lastMinScore = std::min(maxAvgDifference, lastMinScore);
+                   //     STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
+                   //     STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
+                   //     //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
+                    //    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
                            //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i])
-                            if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
+                   //         if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
                                //STORM_PRINT(" *** ")
-                                observationResolutionVector[i] += 1;
-                                changedObservations.insert(i);
-                            }
+                  //              observationResolutionVector[i] += 1;
+                   //             changedObservations.insert(i);
+                    //        }
                            //STORM_PRINT(std::endl)
-                        }
-                    }*/
+                    //    }
+                    //}
                    if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) {
                        underApproxModelSize += 100;
                    }
@ -327,60 +307,13 @@ namespace storm {
                    STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
                                         cc.isEqual(res->underApproxValue, res->overApproxValue),
                                         "The value for the under-approximation is larger than the value for the over-approximation.");
-                    ++refinementCounter;
-                }
-                statistics.refinementSteps = refinementCounter;
-                if (min) {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->underApproxValue, res->overApproxValue});
-                } else {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->overApproxValue, res->underApproxValue});
-                }
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                             bool computeRewards,
-                                                                                             std::vector<ValueType> const& lowerPomdpValueBounds,
-                                                                                             std::vector<ValueType> const& upperPomdpValueBounds,
-                                                                                             uint64_t maxUaModelSize) {
-                STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
-                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
-                                                         upperPomdpValueBounds, maxUaModelSize);
-                if (result == nullptr) {
-                    return nullptr;
-                }
-                if (min) {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->underApproxValue, result->overApproxValue});
-                } else {
-                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
+                    */
+                    ++statistics.refinementSteps.get();
                }
            }

-            template <typename ValueType, typename BeliefType, typename SummandsType>
-            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
-                ValueType result = storm::utility::zero<ValueType>();
-                for (auto const& entry : belief) {
-                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
-                }
-                return result;
-            }
-            
-            template<typename ValueType, typename RewardModelType>
-            std::shared_ptr<RefinementComponents<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                                 std::vector<uint64_t> &observationResolutionVector,
-                                                                                                 bool computeRewards,
-                                                                                                 std::vector<ValueType> const& lowerPomdpValueBounds,
-                                                                                                 std::vector<ValueType> const& upperPomdpValueBounds,
-                                                                                                 uint64_t maxUaModelSize) {
-                
-                auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
-                if (computeRewards) {
-                    beliefManager->setRewardModel(); // TODO: get actual name
-                }
-                
+            template<typename PomdpModelType, typename BeliefValueType>
+            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager) {
                statistics.overApproximationBuildTime.start();
                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                if (computeRewards) {
@ -390,9 +323,6 @@ namespace storm {
                }
                
                // Expand the beliefs to generate the grid on-the-fly
-                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
-                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
-                }
                while (explorer.hasUnexploredState()) {
                    uint64_t currId = explorer.exploreNextState();
                    
@ -445,39 +375,20 @@ namespace storm {
                
                explorer.finishExploration();
                statistics.overApproximationBuildTime.stop();
-                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
-                explorer.getExploredMdp()->printModelInformationToStream(std::cout);

                statistics.overApproximationCheckTime.start();
                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.overApproximationCheckTime.stop();
                
-                STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
-                STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
-                
-                //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds);
-                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
-                    // TODO: return other components needed for refinement.
-                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
-                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
-                }
-                
-                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
-                /* TODO: return other components needed for refinement.
-                return std::make_unique<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                        underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
-                                                        underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
-                */
-                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
-                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
-
+                return std::make_shared<ExplorerType>(std::move(explorer));
            }

-            template<typename ValueType, typename RewardModelType>
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+            /*TODO:
+                       template<typename PomdpModelType, typename BeliefValueType>
            std::shared_ptr<RefinementComponents<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
+            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                            std::vector<uint64_t> &observationResolutionVector,
                                                                                            bool computeRewards,
                                                                                            std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
@ -504,7 +415,7 @@ namespace storm {

                uint64_t nextBeliefId = refinementComponents->beliefList.size();
                uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
-                std::set<uint64_t> relevantStates;
+                std::set<uint64_t> relevantStates; // The MDP states where the observation has changed
                for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
                    auto currentBelief = refinementComponents->beliefList[iter.first];
                    if (changedObservations.find(currentBelief.observation) != changedObservations.end()) {
@ -512,7 +423,7 @@ namespace storm {
                    }
                }

-                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck;
+                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck; // The predecessors of states where the observation has changed
                for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) {
                    for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) {
                        for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(
@ -536,6 +447,7 @@ namespace storm {
                                                                                                                              action);
                    std::map<uint64_t, ValueType> transitionInActionBelief;
                    for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
+                        // Expand and triangulate the successor
                        uint32_t observation = iter->first;
                        uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
                                                                                   targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId);
@ -803,159 +715,11 @@ namespace storm {
                                                        refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap,
                                                        underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId});
            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
-                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<POMDPCheckResult<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
-                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
-            }
-            
-            
-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                                                                                                std::vector<bool> &beliefIsTarget,
-                                                                                                std::set<uint32_t> const &targetObservations,
-                                                                                                uint64_t initialBeliefId, bool min,
-                                                                                                bool computeRewards, uint64_t maxModelSize) {
-                std::set<uint64_t> visitedBelieves;
-                std::deque<uint64_t> beliefsToBeExpanded;
-                bsmap_type beliefStateMap;
-                std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
-                                                                                       {{{1, storm::utility::one<ValueType>()}}}};
-                std::vector<uint64_t> targetStates = {1};
-
-                uint64_t stateId = 2;
-                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId));
-                ++stateId;
-                uint64_t nextId = beliefList.size();
-                uint64_t counter = 0;
-
-                statistics.underApproximationBuildTime.start();
-                // Expand the believes
-                visitedBelieves.insert(initialBeliefId);
-                beliefsToBeExpanded.push_back(initialBeliefId);
-                while (!beliefsToBeExpanded.empty()) {
-                    //TODO think of other ways to stop exploration besides model size
-                    auto currentBeliefId = beliefsToBeExpanded.front();
-                    uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front());
-                    // for targets, we only consider one action with one transition
-                    if (beliefIsTarget[currentBeliefId]) {
-                        // add a self-loop to target states
-                        targetStates.push_back(beliefStateMap.left.at(currentBeliefId));
-                        transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one<ValueType>()}}});
-                    } else if (counter > maxModelSize) {
-                        transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
-                    } else {
-                        // Iterate over all actions and add the corresponding transitions
-                        std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
-                        //TODO add a way to extract the actions from the over-approx and use them here?
-                        for (uint64_t action = 0; action < numChoices; ++action) {
-                            std::map<uint64_t, ValueType> transitionsInStateWithAction;
-                            std::map<uint32_t, ValueType> observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action);
-                            for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) {
-                                uint32_t observation = iter->first;
-                                uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId],
-                                                                                           action,
-                                                                                           observation, nextId);
-                                nextId = beliefList.size();
-                                if (visitedBelieves.insert(nextBeliefId).second) {
-                                    beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId));
-                                    ++stateId;
-                                    beliefsToBeExpanded.push_back(nextBeliefId);
-                                    ++counter;
-                                }
-                                transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second;
-                            }
-                            actionTransitionStorage.push_back(transitionsInStateWithAction);
-                        }
-                        transitions.push_back(actionTransitionStorage);
-                    }
-                    beliefsToBeExpanded.pop_front();
-                    if (storm::utility::resources::isTerminate()) {
-                        statistics.underApproximationBuildAborted = true;
-                        break;
-                    }
-                }
-                statistics.underApproximationStates = transitions.size();
-                if (storm::utility::resources::isTerminate()) {
-                    statistics.underApproximationBuildTime.stop();
-                    return nullptr;
-                }
-                
-                storm::models::sparse::StateLabeling labeling(transitions.size());
-                labeling.addLabel("init");
-                labeling.addLabel("target");
-                labeling.addLabelToState("init", 0);
-                for (auto targetState : targetStates) {
-                    labeling.addLabelToState("target", targetState);
-                }
-
-                std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> model;
-                auto transitionMatrix = buildTransitionMatrix(transitions);
-                if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) {
-                    transitionMatrix.makeRowGroupingTrivial();
-                }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(transitionMatrix, labeling);
-                storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
-                    for (auto const &iter : beliefStateMap.left) {
-                        auto currentBelief = beliefList[iter.first];
-                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                        for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
-                            // Add the reward
-                            rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                             getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                  currentBelief));
-                        }
-                    }
-                    underApproxMdp.addRewardModel("std", rewardModel);
-                    underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
-                }
-                model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
-
-                model->printModelInformationToStream(std::cout);
-                statistics.underApproximationBuildTime.stop();
-
-                std::string propertyString;
-                if (computeRewards) {
-                    propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
-                } else {
-                    propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
-                }
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
-
-                statistics.underApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
-                statistics.underApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return nullptr;
-                }
-                STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
-                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
-
-                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
+            */
            }

-            template<typename ValueType, typename RewardModelType>
-            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
-                                                                                                std::set<uint32_t> const &targetObservations, bool min,
-                                                                                                bool computeRewards, uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds) {
-                // Build the belief MDP until enough states are explored.
-                //TODO think of other ways to stop exploration besides model size
+            template<typename PomdpModelType, typename BeliefValueType>
+            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager) {
                
                statistics.underApproximationBuildTime.start();
                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
@ -981,7 +745,7 @@ namespace storm {
                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                            stopExploration = true;
                            explorer.setCurrentStateIsTruncated();
-                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) {
+                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) {
                            stopExploration = true;
                            explorer.setCurrentStateIsTruncated();
                        }
@ -1024,249 +788,22 @@ namespace storm {
                
                explorer.finishExploration();
                statistics.underApproximationBuildTime.stop();
-                STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl);
-                explorer.getExploredMdp()->printModelInformationToStream(std::cout);

                statistics.underApproximationCheckTime.start();
                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.underApproximationCheckTime.stop();

-                STORM_PRINT("Time Underapproximation: " <<  statistics.underApproximationCheckTime << " seconds." << std::endl);
-                STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
-                
-                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{explorer.getComputedValueAtInitialState(), {}, {}});
+                return std::make_shared<ExplorerType>(std::move(explorer));
            }

-
-            template<typename ValueType, typename RewardModelType>
-            storm::storage::SparseMatrix<ValueType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions) {
-                uint_fast64_t currentRow = 0;
-                uint_fast64_t currentRowGroup = 0;
-                uint64_t nrColumns = transitions.size();
-                uint64_t nrRows = 0;
-                uint64_t nrEntries = 0;
-                for (auto const &actionTransitions : transitions) {
-                    for (auto const &map : actionTransitions) {
-                        nrEntries += map.size();
-                        ++nrRows;
-                    }
-                }
-                storm::storage::SparseMatrixBuilder<ValueType> smb(nrRows, nrColumns, nrEntries, true, true);
-                for (auto const &actionTransitions : transitions) {
-                    smb.newRowGroup(currentRow);
-                    for (auto const &map : actionTransitions) {
-                        for (auto const &transition : map) {
-                            smb.addNextValue(currentRow, transition.first, transition.second);
-                        }
-                        ++currentRow;
-                    }
-                    ++currentRowGroup;
-                }
-                return smb.build();
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInVector(
-                    std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
-                    std::map<uint64_t, ValueType> &probabilities) {
-                // TODO This one is quite slow
-                for (auto const &belief : grid) {
-                    if (belief.observation == observation) {
-                        bool same = true;
-                        for (auto const &probEntry : belief.probabilities) {
-                            if (probabilities.find(probEntry.first) == probabilities.end()) {
-                                same = false;
-                                break;
-                            }
-                            if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) {
-                                same = false;
-                                break;
-                            }
-                        }
-                        if (same) {
-                            return belief.id;
-                        }
-                    }
-                }
-                return -1;
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            storm::pomdp::Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(uint64_t id) {
-                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
-                                 "POMDP contains more than one initial state");
-                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
-                                 "POMDP does not contain an initial state");
-                std::map<uint64_t, ValueType> distribution;
-                uint32_t observation = 0;
-                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
-                    if (pomdp.getInitialStates()[state] == 1) {
-                        distribution[state] = storm::utility::one<ValueType>();
-                        observation = pomdp.getObservation(state);
-                        break;
-                    }
+            template<typename PomdpModelType, typename BeliefValueType>
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
+                // TODO
            }
-                return storm::pomdp::Belief<ValueType>{id, observation, distribution};
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeSubSimplexAndLambdas(
-                    std::map<uint64_t, ValueType> &probabilities, uint64_t resolution, uint64_t nrStates) {
-
-                //TODO this can also be simplified using the sparse vector interpretation
-
-                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
-                // Variable names are based on the paper
-                std::vector<ValueType> x(nrStates);
-                std::vector<ValueType> v(nrStates);
-                std::vector<ValueType> d(nrStates);
-                auto convResolution = storm::utility::convertNumber<ValueType>(resolution);
-
-                for (size_t i = 0; i < nrStates; ++i) {
-                    for (auto const &probEntry : probabilities) {
-                        if (probEntry.first >= i) {
-                            x[i] += convResolution * probEntry.second;
-                        }
-                    }
-                    v[i] = storm::utility::floor(x[i]);
-                    d[i] = x[i] - v[i];
-                }
-
-                auto p = storm::utility::vector::getSortedIndices(d);
-
-                std::vector<std::vector<ValueType>> qs(nrStates, std::vector<ValueType>(nrStates));
-                for (size_t i = 0; i < nrStates; ++i) {
-                    if (i == 0) {
-                        for (size_t j = 0; j < nrStates; ++j) {
-                            qs[i][j] = v[j];
-                        }
-                    } else {
-                        for (size_t j = 0; j < nrStates; ++j) {
-                            if (j == p[i - 1]) {
-                                qs[i][j] = qs[i - 1][j] + storm::utility::one<ValueType>();
-                            } else {
-                                qs[i][j] = qs[i - 1][j];
-                            }
-                        }
-                    }
-                }
-                std::vector<std::map<uint64_t, ValueType>> subSimplex(nrStates);
-                for (size_t j = 0; j < nrStates; ++j) {
-                    for (size_t i = 0; i < nrStates - 1; ++i) {
-                        if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][i] - qs[j][i + 1])) {
-                            subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution;
-                        }
-                    }
-
-                    if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][nrStates - 1])) {
-                        subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution;
-                    }
-                }
-
-                std::vector<ValueType> lambdas(nrStates, storm::utility::zero<ValueType>());
-                auto sum = storm::utility::zero<ValueType>();
-                for (size_t i = 1; i < nrStates; ++i) {
-                    lambdas[i] = d[p[i - 1]] - d[p[i]];
-                    sum += d[p[i - 1]] - d[p[i]];
-                }
-                lambdas[0] = storm::utility::one<ValueType>() - sum;
-
-                return std::make_pair(subSimplex, lambdas);
-            }
-
-
-            template<typename ValueType, typename RewardModelType>
-            std::map<uint32_t, ValueType>
-            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction(
-                    storm::pomdp::Belief<ValueType> &belief,
-                    uint64_t actionIndex) {
-                std::map<uint32_t, ValueType> res;
-                // the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
-                std::map<uint64_t, ValueType> postProbabilities;
-                for (auto const &probEntry : belief.probabilities) {
-                    uint64_t state = probEntry.first;
-                    auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
-                    for (auto const &entry : row) {
-                        if (entry.getValue() > 0) {
-                            postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
-                        }
-                    }
-                }
-                for (auto const &probEntry : postProbabilities) {
-                    uint32_t observation = pomdp.getObservation(probEntry.first);
-                    if (res.count(observation) == 0) {
-                        res[observation] = probEntry.second;
-                    } else {
-                        res[observation] += probEntry.second;
-                    }
-                }
-
-                return res;
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                    std::vector<bool> &beliefIsTarget, std::set<uint32_t> const &targetObservations, storm::pomdp::Belief<ValueType> &belief, uint64_t actionIndex,
-                    uint32_t observation, uint64_t id) {
-                std::map<uint64_t, ValueType> distributionAfter;
-                for (auto const &probEntry : belief.probabilities) {
-                    uint64_t state = probEntry.first;
-                    auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
-                    for (auto const &entry : row) {
-                        if (pomdp.getObservation(entry.getColumn()) == observation) {
-                            distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
-                        }
-                    }
-                }
-                // We have to normalize the distribution
-                auto sum = storm::utility::zero<ValueType>();
-                for (auto const &entry : distributionAfter) {
-                    sum += entry.second;
-                }
-
-                for (auto const &entry : distributionAfter) {
-                    distributionAfter[entry.first] /= sum;
-                }
-                if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) {
-                    auto res = getBeliefIdInVector(beliefList, observation, distributionAfter);
-                    return res;
-                } else {
-                    beliefList.push_back(storm::pomdp::Belief<ValueType>{id, observation, distributionAfter});
-                    beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                    return id;
-                }
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
-                auto result = storm::utility::zero<ValueType>();
-                for (auto const &probEntry : belief) {
-                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
-                }
-                return result;
-            }
-
-            template<typename ValueType, typename RewardModelType>
-            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
-                auto result = storm::utility::zero<ValueType>();
-                for (auto const &probEntry : belief.probabilities) {
-                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
-                }
-                return result;
-            }
-
-
-            template
-            class ApproximatePOMDPModelchecker<double>;
-
-#ifdef STORM_HAVE_CARL

-            template
-            class ApproximatePOMDPModelchecker<storm::RationalNumber>;
+            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>;
+            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;

-#endif
        }
    }
 }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -4,6 +4,7 @@
 #include "storm/utility/logging.h"
 #include "storm-pomdp/storage/Belief.h"
 #include "storm-pomdp/storage/BeliefManager.h"
+#include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include <boost/bimap.hpp>

 #include "storm/storage/jani/Property.h"
@ -17,12 +18,6 @@ namespace storm {
        namespace modelchecker {
            typedef boost::bimap<uint64_t, uint64_t> bsmap_type;

-            template<class ValueType>
-            struct POMDPCheckResult {
-                ValueType overApproxValue;
-                ValueType underApproxValue;
-            };
-
            /**
             *  Struct containing information which is supposed to be persistent over multiple refinement steps
             *
@ -49,9 +44,13 @@ namespace storm {
                bsmap_type underApproxBeliefStateMap;
            };

-            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
+            template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
            class ApproximatePOMDPModelchecker {
            public:
+                typedef typename PomdpModelType::ValueType ValueType;
+                typedef typename PomdpModelType::RewardModelType RewardModelType;
+                typedef storm::storage::BeliefManager<PomdpModelType, BeliefValueType> BeliefManagerType;
+                typedef storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType> ExplorerType;
                
                struct Options {
                    Options();
@ -63,85 +62,60 @@ namespace storm {
                    bool cacheSubsimplices; /// Enables caching of subsimplices
                };
                
-                ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options = Options());
+                struct Result {
+                    Result(ValueType lower, ValueType upper);
+                    ValueType lowerBound;
+                    ValueType upperBound;
+                    ValueType diff (bool relative = false) const;
+                };
+                
+                ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options());
                
-                std::unique_ptr<POMDPCheckResult<ValueType>> check(storm::logic::Formula const& formula);
+                Result check(storm::logic::Formula const& formula);

                void printStatisticsToStream(std::ostream& stream) const;
                
            private:
                /**
-                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
+                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
                 *
-                 * @param targetObservations the set of observations to be reached
-                 * @param min true if minimum probability is to be computed
-                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 * @param targetObservations set of target observations
+                 * @param min true if minimum value is to be computed
+                 * @param observationResolutionVector vector containing the resolution to be used for each observation
+                 * @param computeRewards true if rewards are to be computed, false if probability is computed
+                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
+                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
+                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards);
+                void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
+                
                
                /**
-                 * Compute the reachability probability of given target observations on a POMDP for the given resolution only.
-                 * On-the-fly state space generation is used for the overapproximation
+                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
                 *
                 * @param targetObservations the set of observations to be reached
                 * @param min true if minimum probability is to be computed
-                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min);
+                void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);

                /**
-                 * Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
-                 * On-the-fly state space generation is used for the overapproximation
-                 *
-                 * @param targetObservations the set of observations to be reached
-                 * @param min true if minimum rewards are to be computed
-                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                 */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min);
+                std::shared_ptr<ExplorerType> computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager);
+
+                void refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);

-            private:
                /**
-                 * Helper method to compute the inital step of the refinement loop
-                 *
-                 * @param targetObservations set of target observations
-                 * @param min true if minimum value is to be computed
-                 * @param observationResolutionVector vector containing the resolution to be used for each observation
-                 * @param computeRewards true if rewards are to be computed, false if probability is computed
-                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
-                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
-                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
-                 * @return struct containing components generated during the computation to be used in later refinement iterations
+                 * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
                 */
-                std::shared_ptr<RefinementComponents<ValueType>>
-                computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
-                                           bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
+                std::shared_ptr<ExplorerType> computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager);

-                std::shared_ptr<RefinementComponents<ValueType>>
-                computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
-                                      bool computeRewards, std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
-                                      std::set<uint32_t> changedObservations,
-                                      boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                      boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
+                void refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);

-                /**
-                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
-                 *
-                 * @param targetObservations set of target observations
-                 * @param min true if minimum value is to be computed
-                 * @param observationResolutionVector vector containing the resolution to be used for each observation
-                 * @param computeRewards true if rewards are to be computed, false if probability is computed
-                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
-                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
-                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
-                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
-                 */
-                std::unique_ptr<POMDPCheckResult<ValueType>>
-                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
-                                       std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);

+#ifdef REMOVE_THIS
                /**
                 * Helper to compute an underapproximation of the reachability property.
                 * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
@ -243,6 +217,7 @@ namespace storm {
                 */
                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
                ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
+#endif //REMOVE_THIS
                
                struct Statistics {
                    Statistics();
@ -262,7 +237,7 @@ namespace storm {
                };
                Statistics statistics;
                
-                storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp;
+                PomdpModelType const& pomdp;
                Options options;
                storm::utility::ConstantsComparator<ValueType> cc;
            };