First (re-) implementation of refinement. (probably needs some testing/debugging)

5 years ago · c2ddea1480
3 changed files with 267 additions and 646 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -43,6 +43,10 @@ namespace storm {
            }
            BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;

+            BeliefManagerType const& getBeliefManager() const {
+                return *beliefManager;
+            }
+            
            void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
                status = Status::Exploring;
                // Reset data from potential previous explorations
@ -101,7 +105,7 @@ namespace storm {
                exploredBeliefIds.clear();
                exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
                exploredMdpTransitions.clear();
-                exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices);
+                exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices());
                exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
                mdpActionRewards.clear();
                if (exploredMdp->hasRewardModel()) {
@ -235,7 +239,7 @@ namespace storm {
                for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) {
                    internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue());
                    // Check whether exploration is needed
-                    auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()];
+                    auto beliefId = getBeliefId(transition.getColumn());
                    if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state
                        if (!exploredBeliefIds.get(beliefId)) {
                            // This belief needs exploration
@ -397,6 +401,10 @@ namespace storm {
                status = Status::ModelChecked;
            }
            
+            bool hasComputedValues() const {
+                return status == Status::ModelChecked;
+            }
+            
            std::vector<ValueType> const& getValuesOfExploredMdp() const {
                STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
                return values;
@ -408,6 +416,51 @@ namespace storm {
                return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)];
            }
            
+            MdpStateType getBeliefId(MdpStateType exploredMdpState) const {
+                STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
+                return mdpStateToBeliefIdMap[exploredMdpState];
+            }
+            
+            struct SuccessorObservationInformation {
+                SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) {
+                    // Intentionally left empty.
+                }
+                
+                void join(SuccessorObservationInformation other) {
+                    observationProbability += other.observationProbability;
+                    maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
+                    successorWithObsCount += other.successorWithObsCount;
+                }
+                
+                ValueType observationProbability; /// The probability we move to the corresponding observation.
+                ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation.
+                uint64_t successorWithObsCount; /// The number of successors with this observation
+            };
+            
+            void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
+                uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
+                gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
+            }
+            
+            void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
+                STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
+                for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
+                    auto const& beliefId = getBeliefId(entry.getColumn());
+                    if (beliefId != beliefManager->noId()) {
+                        auto const& obs = beliefManager->getBeliefObservation(beliefId);
+                        SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1);
+                        auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info);
+                        if (!obsInsertion.second) {
+                            // There already is an entry for this observation, so join the two informations
+                            obsInsertion.first->second.join(info);
+                        }
+                    }
+                }
+            }
+            
+            
        private:
            MdpStateType noState() const {
                return std::numeric_limits<MdpStateType>::max();
@ -438,7 +491,8 @@ namespace storm {
            }
            
            MdpStateType getCurrentBeliefId() const {
-                return mdpStateToBeliefIdMap[getCurrentMdpState()];
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                return getBeliefId(getCurrentMdpState());
            }
            
            void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -171,8 +171,9 @@ namespace storm {
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
-                    auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager);
-                    if (approx) {
+                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx);
+                    if (approx->hasComputedValues()) {
                        STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n");
                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
                        ValueType& resultValue = min ? result.lowerBound : result.upperBound;
@ -185,8 +186,9 @@ namespace storm {
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
-                    auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager);
-                    if (approx) {
+                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx);
+                    if (approx->hasComputedValues()) {
                        STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
                        ValueType& resultValue = min ? result.upperBound : result.lowerBound;
@ -200,23 +202,27 @@ namespace storm {
                
                // Set up exploration data
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
-                auto beliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                auto overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
+                auto underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                if (rewardModelName) {
-                    beliefManager->setRewardModel(rewardModelName);
+                    overApproxBeliefManager->setRewardModel(rewardModelName);
+                    underApproxBeliefManager->setRewardModel(rewardModelName);
                }
                
                // OverApproximaion
-                auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager);
-                if (!overApproximation) {
+                auto overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation);
+                if (!overApproximation->hasComputedValues()) {
                    return;
                }
                ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
                overApproxValue = overApproximation->getComputedValueAtInitialState();
                
-                // UnderApproximation TODO: use same belief manager?)
-                uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
-                auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager);
-                if (!underApproximation) {
+                // UnderApproximation
+                uint64_t underApproxSizeThreshold = std::max<uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates(), 10);
+                auto underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
+                if (!underApproximation->hasComputedValues()) {
                    return;
                }
                ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
@ -225,379 +231,165 @@ namespace storm {
                // ValueType lastMinScore = storm::utility::infinity<ValueType>();
                // Start refinement
                statistics.refinementSteps = 0;
+                ValueType refinementAggressiveness = storm::utility::zero<ValueType>();
                while (result.diff() > options.refinementPrecision) {
                    if (storm::utility::resources::isTerminate()) {
                        break;
                    }
-                    // TODO the actual refinement
-                    /*
-                    // choose which observation(s) to refine
-                    std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
-                    std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
-                    bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin();
-                    while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) {
-                        auto currentBelief = res->beliefList[underApproxStateBeliefIter->second];
-                        beliefCount[currentBelief.observation] += 1;
-                        bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second);
-                        if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) {
-                            // If there is an over-approximate value for the belief, use it
-                            auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first];
-                            obsAccumulator[currentBelief.observation] += diff;
-                        } else {
-                            //otherwise, we approximate a value TODO this is critical, we have to think about it
-                            auto overApproxValue = storm::utility::zero<ValueType>();
-                            auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates());
-                            auto subSimplex = temp.first;
-                            auto lambdas = temp.second;
-                            for (size_t j = 0; j < lambdas.size(); ++j) {
-                                if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                    uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]);
-                                    bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId);
-                                    if (approxIter != res->overApproxBeliefStateMap.left.end()) {
-                                        overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second];
-                                    } else {
-                                        overApproxValue += lambdas[j];
-                                    }
-                                }
-                            }
-                            obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first];
-                        }
-                        ++underApproxStateBeliefIter;
-                    }
-
-
-                    //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
-                       // obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
-                    //}
-                    changedObservations.clear();
-
-                    //TODO think about some other scoring methods
-                    auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end());
-                    //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) {
-                    lastMinScore = maxAvgDifference;
-                    auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
-                    STORM_PRINT("Set all to " << maxRes + 1 << std::endl)
-                    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
-                        observationResolutionVector[i] = maxRes + 1;
-                        changedObservations.insert(i);
+                    
+                    // Refine over-approximation
+                    refinementAggressiveness *= storm::utility::convertNumber<ValueType>(1.1);;
+                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation);
+                    if (overApproximation->hasComputedValues()) {
+                        overApproxValue = overApproximation->getComputedValueAtInitialState();
+                    } else {
+                        break;
                    }
-                    //} else {
-                   //     lastMinScore = std::min(maxAvgDifference, lastMinScore);
-                   //     STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
-                   //     STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
-                   //     //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
-                    //    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
-                            //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i])
-                   //         if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
-                                //STORM_PRINT(" *** ")
-                  //              observationResolutionVector[i] += 1;
-                   //             changedObservations.insert(i);
-                    //        }
-                            //STORM_PRINT(std::endl)
-                    //    }
-                    //}
-                    if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) {
-                        underApproxModelSize += 100;
+                    
+                    // Refine under-approximation
+                    underApproxSizeThreshold *= storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType>(underApproxSizeThreshold) * (storm::utility::one<ValueType>() + refinementAggressiveness));
+                    underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
+                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
+                    if (underApproximation->hasComputedValues()) {
+                        underApproxValue = underApproximation->getComputedValueAtInitialState();
+                    } else {
+                        break;
                    }
-                    STORM_PRINT(
-                            "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl)
-                    res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
-                                                res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize);
-                    //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot");
-                    STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
-                                         cc.isEqual(res->underApproxValue, res->overApproxValue),
-                                         "The value for the under-approximation is larger than the value for the over-approximation.");
-                    */
                    ++statistics.refinementSteps.get();
                }
            }

+            /*!
+             * Heuristically rates the quality of the approximation described by the given successor observation info.
+             * Here, 0 means a bad approximation and 1 means a good approximation.
+             */
+            template<typename PomdpModelType, typename BeliefValueType>
+            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) {
+                auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
+                auto one = storm::utility::one<ValueType>();
+                
+                // Create the actual rating for this observation at this choice from the given info
+                ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
+                // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
+                // Normalize the rating so that it ranges from 0 to 1, where
+                // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
+                obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
+                return obsChoiceRating;
+            }
+            
+            template<typename PomdpModelType, typename BeliefValueType>
+            std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation) {
+                uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices();
+
+                std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>());
+                
+                std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
+                for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) {
+                    gatheredSuccessorObservations.clear();
+                    overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
+                    for (auto const& obsInfo : gatheredSuccessorObservations) {
+                        auto const& obs = obsInfo.first;
+                        ValueType obsChoiceRating = rateObservation(obsInfo.second);
+ 
+                        // The rating of the observation will be the minimum over all choice-based observation ratings
+                        resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
+                    }
+                }
+                return resultingRatings;
+            }
+            
            template<typename PomdpModelType, typename BeliefValueType>
-            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager) {
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+                STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given");
+                STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero<ValueType>(), "Can not refine with negative  aggressiveness.");
+                STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one<ValueType>(), "Refinement with aggressiveness > 1 is invalid.");
+                
                statistics.overApproximationBuildTime.start();
-                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
-                if (computeRewards) {
-                    explorer.startNewExploration(storm::utility::zero<ValueType>());
+                storm::storage::BitVector refinedObservations;
+                if (!refine) {
+                    // If we build the model from scratch, we first have to setup the explorer for the overApproximation.
+                    if (computeRewards) {
+                        overApproximation->startNewExploration(storm::utility::zero<ValueType>());
+                    } else {
+                        overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    }
                } else {
-                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement.
+                    auto obsRatings = getObservationRatings(overApproximation);
+                    ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
+                    // Potentially increase the aggressiveness so that at least one observation actually gets refinement.
+                    *refinementAggressiveness = std::max(minRating, *refinementAggressiveness);
+                    refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;});
+                    STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
+                    for (auto const& obs : refinedObservations) {
+                        // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness)
+                        ValueType incrementValue = storm::utility::one<ValueType>() + (*refinementAggressiveness) * storm::utility::convertNumber<ValueType>(observationResolutionVector[obs]);
+                        observationResolutionVector[obs] += storm::utility::convertNumber<uint64_t>(storm::utility::ceil(incrementValue));
+                    }
+                    overApproximation->restartExploration();
                }
                
-                // Expand the beliefs to generate the grid on-the-fly
-                while (explorer.hasUnexploredState()) {
-                    uint64_t currId = explorer.exploreNextState();
+                // Start exploration
+                std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
+                while (overApproximation->hasUnexploredState()) {
+                    uint64_t currId = overApproximation->exploreNextState();
                    
                    uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                    if (targetObservations.count(currObservation) != 0) {
-                        explorer.setCurrentStateIsTarget();
-                        explorer.addSelfloopTransition();
+                        overApproximation->setCurrentStateIsTarget();
+                        overApproximation->addSelfloopTransition();
                    } else {
                        bool stopExploration = false;
-                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                        if (storm::utility::abs<ValueType>(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                            stopExploration = true;
-                            explorer.setCurrentStateIsTruncated();
+                            overApproximation->setCurrentStateIsTruncated();
                        }
                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
-                            ValueType truncationProbability = storm::utility::zero<ValueType>();
-                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
-                            auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
-                            for (auto const& successor : successorGridPoints) {
-                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
-                                if (!added) {
-                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
-                                    // We did not explore this successor state. Get a bound on the "missing" value
-                                    truncationProbability += successor.second;
-                                    truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first));
-                                }
-                            }
-                            if (stopExploration) {
-                                if (computeRewards) {
-                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
-                                } else {
-                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
-                                }
-                            }
-                            if (computeRewards) {
-                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
-                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
-                            }
-                        }
-                    }
-                    if (storm::utility::resources::isTerminate()) {
-                        statistics.overApproximationBuildAborted = true;
-                        break;
-                    }
-                }
-                statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates();
-                if (storm::utility::resources::isTerminate()) {
-                    statistics.overApproximationBuildTime.stop();
-                    return nullptr;
-                }
-                
-                explorer.finishExploration();
-                statistics.overApproximationBuildTime.stop();
-
-                statistics.overApproximationCheckTime.start();
-                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
-                statistics.overApproximationCheckTime.stop();
-                
-                return std::make_shared<ExplorerType>(std::move(explorer));
-            }
-
-            template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
-            /*TODO:
-                       template<typename PomdpModelType, typename BeliefValueType>
-            std::shared_ptr<RefinementComponents<ValueType>>
-            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
-                                                                                            std::vector<uint64_t> &observationResolutionVector,
-                                                                                            bool computeRewards,
-                                                                                            std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
-                                                                                            std::set<uint32_t> changedObservations,
-                                                                                            boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                            boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
-                                                                                            uint64_t maxUaModelSize) {
-                bool initialBoundMapsSet = overApproximationMap && underApproximationMap;
-                std::map<uint64_t, ValueType> initialOverMap;
-                std::map<uint64_t, ValueType> initialUnderMap;
-                if (initialBoundMapsSet) {
-                    initialOverMap = overApproximationMap.value();
-                    initialUnderMap = underApproximationMap.value();
-                }
-                // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively
-                std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
-                std::map<uint64_t, std::vector<ValueType>> lambdaCache;
-
-                // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace
-                std::map<uint64_t, ValueType> weightedSumOverMap;
-                std::map<uint64_t, ValueType> weightedSumUnderMap;
-
-                statistics.overApproximationBuildTime.start();
-
-                uint64_t nextBeliefId = refinementComponents->beliefList.size();
-                uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
-                std::set<uint64_t> relevantStates; // The MDP states where the observation has changed
-                for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
-                    auto currentBelief = refinementComponents->beliefList[iter.first];
-                    if (changedObservations.find(currentBelief.observation) != changedObservations.end()) {
-                        relevantStates.insert(iter.second);
-                    }
-                }
-
-                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck; // The predecessors of states where the observation has changed
-                for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) {
-                    for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) {
-                        for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(
-                                state, row).begin();
-                             itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) {
-                            if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) {
-                                statesAndActionsToCheck.insert(std::make_pair(state, row));
-                                break;
-                            }
-                        }
-                    }
-                }
-
-                std::deque<uint64_t> beliefsToBeExpanded;
-
-                std::map<std::pair<uint64_t, uint64_t>, std::map<uint64_t, ValueType>> transitionsStateActionPair;
-                for (auto const &stateActionPair : statesAndActionsToCheck) {
-                    auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first);
-                    auto action = stateActionPair.second;
-                    std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId],
-                                                                                                                              action);
-                    std::map<uint64_t, ValueType> transitionInActionBelief;
-                    for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
-                        // Expand and triangulate the successor
-                        uint32_t observation = iter->first;
-                        uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
-                                                                                   targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId);
-                        nextBeliefId = refinementComponents->beliefList.size();
-                        //Triangulate here and put the possibly resulting belief in the grid
-                        std::vector<std::map<uint64_t, ValueType>> subSimplex;
-                        std::vector<ValueType> lambdas;
-                        //TODO add caching
-                        if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
-                            subSimplex = subSimplexCache[idNextBelief];
-                            lambdas = lambdaCache[idNextBelief];
-                        } else {
-                            auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities,
-                                                                    observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation],
-                                                                    pomdp.getNumberOfStates());
-                            subSimplex = temp.first;
-                            lambdas = temp.second;
-                            if (options.cacheSubsimplices) {
-                                subSimplexCache[idNextBelief] = subSimplex;
-                                lambdaCache[idNextBelief] = lambdas;
-                            }
-                        }
-                        for (size_t j = 0; j < lambdas.size(); ++j) {
-                            if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]);
-                                if (approxId == uint64_t(-1)) {
-                                    // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
-                                    storm::pomdp::Belief<ValueType> gridBelief = {nextBeliefId, observation, subSimplex[j]};
-                                    refinementComponents->beliefList.push_back(gridBelief);
-                                    refinementComponents->beliefGrid.push_back(gridBelief);
-                                    refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                                    // compute overapproximate value using MDP result map
-                                    if (initialBoundMapsSet) {
-                                        auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                        auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                        for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                            tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialOverMap[i]);
-                                            tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialUnderMap[i]);
-                                        }
-                                        weightedSumOverMap[nextBeliefId] = tempWeightedSumOver;
-                                        weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder;
+                            // Check whether we expand this state/action pair
+                            // We always expand if we are not doing refinement of if the state was not available in the "old" MDP.
+                            // Otherwise, a heuristic decides.
+                            bool expandStateAction = true;
+                            if (refine && overApproximation->currentStateHasOldBehavior()) {
+                                // Compute a rating of the current state/action pair
+                                ValueType stateActionRating = storm::utility::one<ValueType>();
+                                gatheredSuccessorObservations.clear();
+                                overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations);
+                                for (auto const& obsInfo : gatheredSuccessorObservations) {
+                                    if (refinedObservations.get(obsInfo.first)) {
+                                        ValueType obsRating = rateObservation(obsInfo.second);
+                                        stateActionRating = std::min(stateActionRating, obsRating);
                                    }
-                                    beliefsToBeExpanded.push_back(nextBeliefId);
-                                    refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId));
-                                    transitionInActionBelief[nextStateId] = iter->second * lambdas[j];
-                                    ++nextBeliefId;
-                                    ++nextStateId;
-                                } else {
-                                    transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
                                }
+                                // Only refine if this rating is below the doubled refinementAggressiveness
+                                expandStateAction = stateActionRating < storm::utility::convertNumber<ValueType>(2.0) * (*refinementAggressiveness);
                            }
-                        }
-                    }
-                    if (!transitionInActionBelief.empty()) {
-                        transitionsStateActionPair[stateActionPair] = transitionInActionBelief;
-                    }
-                }
-
-                std::set<uint64_t> stoppedExplorationStateSet;
-
-                // Expand newly added beliefs
-                while (!beliefsToBeExpanded.empty()) {
-                    uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
-                    bool isTarget = refinementComponents->beliefIsTarget[currId];
-
-                    if (initialBoundMapsSet &&
-                        cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber<ValueType>(options.explorationThreshold))) {
-                        STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId]
-                                                                 << std::endl)
-                        transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]},
-                                                                                                                                         {0, storm::utility::one<ValueType>() -
-                                                                                                                                             weightedSumOverMap[currId]}};
-                        stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId));
-                        continue;
-                    }
-
-                    if (isTarget) {
-                        // Depending on whether we compute rewards, we select the right initial result
-                        // MDP stuff
-                        transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] =
-                                {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one<ValueType>()}};
-                    } else {
-                        uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front();
-                        uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
-                        std::vector<ValueType> actionRewardsInState(numChoices);
-
-                        for (uint64_t action = 0; action < numChoices; ++action) {
-                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action);
-                            std::map<uint64_t, ValueType> transitionInActionBelief;
-                            for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
-                                uint32_t observation = iter->first;
-                                // THIS CALL IS SLOW
-                                // TODO speed this up
-                                uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
-                                                                                           targetObservations, refinementComponents->beliefList[currId], action, observation,
-                                                                                           nextBeliefId);
-                                nextBeliefId = refinementComponents->beliefList.size();
-                                //Triangulate here and put the possibly resulting belief in the grid
-                                std::vector<std::map<uint64_t, ValueType>> subSimplex;
-                                std::vector<ValueType> lambdas;
-
-                                if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
-                                    subSimplex = subSimplexCache[idNextBelief];
-                                    lambdas = lambdaCache[idNextBelief];
-                                } else {
-                                    auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities,
-                                                                            observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation],
-                                                                            pomdp.getNumberOfStates());
-                                    subSimplex = temp.first;
-                                    lambdas = temp.second;
-                                    if (options.cacheSubsimplices) {
-                                        subSimplexCache[idNextBelief] = subSimplex;
-                                        lambdaCache[idNextBelief] = lambdas;
+                            if (expandStateAction) {
+                                ValueType truncationProbability = storm::utility::zero<ValueType>();
+                                ValueType truncationValueBound = storm::utility::zero<ValueType>();
+                                auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
+                                for (auto const& successor : successorGridPoints) {
+                                    bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                    if (!added) {
+                                        STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                        // We did not explore this successor state. Get a bound on the "missing" value
+                                        truncationProbability += successor.second;
+                                        truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first));
                                    }
                                }
-
-                                for (size_t j = 0; j < lambdas.size(); ++j) {
-                                    if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
-                                        auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]);
-                                        if (approxId == uint64_t(-1)) {
-                                            // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
-                                            storm::pomdp::Belief<ValueType> gridBelief = {nextBeliefId, observation, subSimplex[j]};
-                                            refinementComponents->beliefList.push_back(gridBelief);
-                                            refinementComponents->beliefGrid.push_back(gridBelief);
-                                            refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
-                                            // compute overapproximate value using MDP result map
-                                            if (initialBoundMapsSet) {
-                                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
-                                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
-                                                for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialOverMap[i]);
-                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialUnderMap[i]);
-                                                }
-                                                weightedSumOverMap[nextBeliefId] = tempWeightedSumOver;
-                                                weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder;
-                                            }
-                                            beliefsToBeExpanded.push_back(nextBeliefId);
-                                            refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId));
-                                            transitionInActionBelief[nextStateId] = iter->second * lambdas[j];
-                                            ++nextBeliefId;
-                                            ++nextStateId;
-                                        } else {
-                                            transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
-                                        }
+                                if (stopExploration) {
+                                    if (computeRewards) {
+                                        overApproximation->addTransitionsToExtraStates(action, truncationProbability);
+                                    } else {
+                                        overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
                                    }
                                }
-                            }
-                            if (!transitionInActionBelief.empty()) {
-                                transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief;
+                                if (computeRewards) {
+                                    // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                    overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
+                                }
+                            } else {
+                                // Do not refine here
+                                overApproximation->restoreOldBehaviorAtCurrentState(action);
                            }
                        }
                    }
@ -606,173 +398,85 @@ namespace storm {
                        break;
                    }
                }
-
-                statistics.overApproximationStates = nextStateId;
+                // TODO: Drop unreachable states (sometimes?)
+                statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
                if (storm::utility::resources::isTerminate()) {
                    statistics.overApproximationBuildTime.stop();
-                    // Return the result from the old refinement step
-                    return refinementComponents;
-                }
-                storm::models::sparse::StateLabeling mdpLabeling(nextStateId);
-                mdpLabeling.addLabel("init");
-                mdpLabeling.addLabel("target");
-                mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId));
-                mdpLabeling.addLabelToState("target", 1);
-                uint_fast64_t currentRow = 0;
-                uint_fast64_t currentRowGroup = 0;
-                storm::storage::SparseMatrixBuilder<ValueType> smb(0, nextStateId, 0, false, true);
-                auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix();
-                smb.newRowGroup(currentRow);
-                smb.addNextValue(currentRow, 0, storm::utility::one<ValueType>());
-                ++currentRow;
-                ++currentRowGroup;
-                smb.newRowGroup(currentRow);
-                smb.addNextValue(currentRow, 1, storm::utility::one<ValueType>());
-                ++currentRow;
-                ++currentRowGroup;
-                for (uint64_t state = 2; state < nextStateId; ++state) {
-                    smb.newRowGroup(currentRow);
-                    //STORM_PRINT("Loop State: " << state << std::endl)
-                    uint64_t numChoices = pomdp.getNumberOfChoices(
-                            pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front());
-                    bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)];
-                    for (uint64_t action = 0; action < numChoices; ++action) {
-                        if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) {
-                            for (auto const &entry : oldTransitionMatrix.getRow(state, action)) {
-                                smb.addNextValue(currentRow, entry.getColumn(), entry.getValue());
-                            }
-                        } else {
-                            for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) {
-                                smb.addNextValue(currentRow, iter.first, iter.second);
-                            }
-                        }
-                        ++currentRow;
-                        if (isTarget) {
-                            // If the state is a target, we only have one action, thus we add the target label and stop the iteration
-                            mdpLabeling.addLabelToState("target", state);
-                            break;
-                        }
-                        if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) {
-                            break;
-                        }
-                    }
-                    ++currentRowGroup;
-                }
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(smb.build(), mdpLabeling);
-                storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
-                    for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
-                        auto currentBelief = refinementComponents->beliefList[iter.first];
-                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                        for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
-                            // Add the reward
-                            mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                                getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                     currentBelief));
-                        }
-                    }
-                    overApproxMdp.addRewardModel("std", mdpRewardModel);
-                    overApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
+                    return;
                }
-                overApproxMdp.printModelInformationToStream(std::cout);
-                statistics.overApproximationBuildTime.stop();
-                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                
-                auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);
-                auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model);
-                std::string propertyString = computeRewards ? "R" : "P";
-                propertyString += min ? "min" : "max";
-                propertyString += "=? [F \"target\"]";
-                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
-                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
-                auto task = storm::api::createTask<ValueType>(property, false);
+                overApproximation->finishExploration();
+                statistics.overApproximationBuildTime.stop();
+
                statistics.overApproximationCheckTime.start();
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
+                overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.overApproximationCheckTime.stop();
-                if (storm::utility::resources::isTerminate() && !res) {
-                    return refinementComponents; // Return the result from the previous iteration
-                }
-                STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl)
-                STORM_LOG_ASSERT(res, "Result not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
-                auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)];
-
-                //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations,
-                                                                       refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize);
-                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
-                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
-                    return std::make_unique<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId});
-                }
-                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
-
-                return std::make_shared<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
-                                                        underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid,
-                                                        refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap,
-                                                        underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId});
-            }
-            */
            }

            template<typename PomdpModelType, typename BeliefValueType>
-            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager) {
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
                
                statistics.underApproximationBuildTime.start();
-                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
-                if (computeRewards) {
-                    explorer.startNewExploration(storm::utility::zero<ValueType>());
+                if (!underApproximation->hasComputedValues()) {
+                    // Build a new under approximation
+                    if (computeRewards) {
+                        underApproximation->startNewExploration(storm::utility::zero<ValueType>());
+                    } else {
+                        underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    }
                } else {
-                    explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
+                    // Restart the building process
+                    underApproximation->restartExploration();
                }
                
-                // Expand the beliefs to generate the grid on-the-fly
-                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
-                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
-                }
-                while (explorer.hasUnexploredState()) {
-                    uint64_t currId = explorer.exploreNextState();
+                // Expand the beliefs
+                while (underApproximation->hasUnexploredState()) {
+                    uint64_t currId = underApproximation->exploreNextState();
                    
                    uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                    if (targetObservations.count(currObservation) != 0) {
-                        explorer.setCurrentStateIsTarget();
-                        explorer.addSelfloopTransition();
+                        underApproximation->setCurrentStateIsTarget();
+                        underApproximation->addSelfloopTransition();
                    } else {
                        bool stopExploration = false;
-                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
-                            stopExploration = true;
-                            explorer.setCurrentStateIsTruncated();
-                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) {
-                            stopExploration = true;
-                            explorer.setCurrentStateIsTruncated();
+                        if (!underApproximation->currentStateHasOldBehavior()) {
+                            if (storm::utility::abs<ValueType>(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
+                                stopExploration = true;
+                                underApproximation->setCurrentStateIsTruncated();
+                            } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) {
+                                stopExploration = true;
+                                underApproximation->setCurrentStateIsTruncated();
+                            }
                        }
                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
-                            ValueType truncationProbability = storm::utility::zero<ValueType>();
-                            ValueType truncationValueBound = storm::utility::zero<ValueType>();
-                            auto successors = beliefManager->expand(currId, action);
-                            for (auto const& successor : successors) {
-                                bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
-                                if (!added) {
-                                    STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
-                                    // We did not explore this successor state. Get a bound on the "missing" value
-                                    truncationProbability += successor.second;
-                                    truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first));
+                            // Always restore old behavior if available
+                            if (underApproximation->currentStateHasOldBehavior()) {
+                                underApproximation->restoreOldBehaviorAtCurrentState(action);
+                            } else {
+                                ValueType truncationProbability = storm::utility::zero<ValueType>();
+                                ValueType truncationValueBound = storm::utility::zero<ValueType>();
+                                auto successors = beliefManager->expand(currId, action);
+                                for (auto const& successor : successors) {
+                                    bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration);
+                                    if (!added) {
+                                        STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
+                                        // We did not explore this successor state. Get a bound on the "missing" value
+                                        truncationProbability += successor.second;
+                                        truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first));
+                                    }
+                                }
+                                if (stopExploration) {
+                                    if (computeRewards) {
+                                        underApproximation->addTransitionsToExtraStates(action, truncationProbability);
+                                    } else {
+                                        underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                    }
                                }
-                            }
-                            if (stopExploration) {
                                if (computeRewards) {
-                                    explorer.addTransitionsToExtraStates(action, truncationProbability);
-                                } else {
-                                    explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
+                                    // The truncationValueBound will be added on top of the reward introduced by the current belief state.
+                                    underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
                                }
                            }
-                            if (computeRewards) {
-                                // The truncationValueBound will be added on top of the reward introduced by the current belief state.
-                                explorer.computeRewardAtCurrentState(action, truncationValueBound);
-                            }
                        }
                    }
                    if (storm::utility::resources::isTerminate()) {
@ -780,25 +484,19 @@ namespace storm {
                        break;
                    }
                }
-                statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates();
+                statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
                if (storm::utility::resources::isTerminate()) {
                    statistics.underApproximationBuildTime.stop();
-                    return nullptr;
+                    return;
                }
                
-                explorer.finishExploration();
+                underApproximation->finishExploration();
                statistics.underApproximationBuildTime.stop();

                statistics.underApproximationCheckTime.start();
-                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
+                underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.underApproximationCheckTime.stop();

-                return std::make_shared<ExplorerType>(std::move(explorer));
-            }
-
-            template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
-                // TODO
            }

            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>;
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -18,32 +18,6 @@ namespace storm {
        namespace modelchecker {
            typedef boost::bimap<uint64_t, uint64_t> bsmap_type;

-            /**
-             *  Struct containing information which is supposed to be persistent over multiple refinement steps
-             *
-             */
-            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
-            struct RefinementComponents {
-                std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> overApproxModelPtr;
-                ValueType overApproxValue;
-                ValueType underApproxValue;
-                std::map<uint64_t, ValueType> overApproxMap;
-                std::map<uint64_t, ValueType> underApproxMap;
-                std::vector<storm::pomdp::Belief<ValueType>> beliefList;
-                std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
-                std::vector<bool> beliefIsTarget;
-                bsmap_type overApproxBeliefStateMap;
-                bsmap_type underApproxBeliefStateMap;
-                uint64_t initialBeliefId;
-            };
-
-            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
-            struct UnderApproxComponents {
-                ValueType underApproxValue;
-                std::map<uint64_t, ValueType> underApproxMap;
-                bsmap_type underApproxBeliefStateMap;
-            };
-
            template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
            class ApproximatePOMDPModelchecker {
            public:
@ -103,121 +77,16 @@ namespace storm {
                /**
                 * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                 */
-                std::shared_ptr<ExplorerType> computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager);
-
-                void refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
+                void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);

                /**
                 * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
                 */
-                std::shared_ptr<ExplorerType> computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager);
-
-                void refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
-
-
-#ifdef REMOVE_THIS
-                /**
-                 * Helper to compute an underapproximation of the reachability property.
-                 * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
-                 *
-                 * @param beliefList vector containing already generated beliefs
-                 * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target
-                 * @param targetObservations set of target observations
-                 * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state
-                 * @param min true if minimum value is to be computed
-                 * @param computeReward true if rewards are to be computed
-                 * @param maxModelSize number of states up until which the belief support should be unrolled
-                 * @return struct containing the components generated during the under approximation
-                 */
-                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                                                                                                             std::vector<bool> &beliefIsTarget,
-                                                                                                             std::set<uint32_t> const &targetObservations,
-                                                                                                             uint64_t initialBeliefId, bool min, bool computeReward,
-                                                                                                             uint64_t maxModelSize);
-                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
-                                                                                                             std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
-                                                                                                             uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds);
-
-                /**
-                 * Constructs the initial belief for the given POMDP
-                 *
-                 * @param pomdp the POMDP
-                 * @param id the id the initial belief is given
-                 * @return a belief representing the initial belief
-                 */
-                storm::pomdp::Belief<ValueType>
-                getInitialBelief(uint64_t id);
-
-
-                /**
-                 * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation
-                 *
-                 * @param probabilities the probability distribution of the belief
-                 * @param gridResolution the resolution used for the belief
-                 * @param nrStates number of states in the POMDP
-                 * @return a pair containing: 1) the subsimplices 2) the lambda values
-                 */
-                std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
-                computeSubSimplexAndLambdas(std::map<uint64_t, ValueType> &probabilities, uint64_t gridResolution, uint64_t nrStates);
+                void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);

-
-                /**
-                 * Helper method to get the probabilities to be in a state with each observation after performing an action
-                 *
-                 * @param belief the belief in which the action is performed
-                 * @param actionIndex the index of the action to be performed
-                 * @return mapping from each observation to the probability to be in a state with that observation after performing the action
-                 */
-                std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction(storm::pomdp::Belief<ValueType> &belief,
-                        uint64_t actionIndex);
-
-                /**
-                 * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation.
-                 * If the belief does not exist yet, it is created and added to the list of all beliefs
-                 *
-                 * @param beliefList data structure to store all generated beliefs
-                 * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief
-                 * @param targetObservations set of target observations
-                 * @param belief the starting belief
-                 * @param actionIndex the index of the action to be performed
-                 * @param observation the observation after the action was performed
-                 * @return the resulting belief (observation and distribution)
-                 */
-                uint64_t getBeliefAfterActionAndObservation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                        std::vector<bool> &beliefIsTarget,
-                        std::set<uint32_t> const &targetObservations,
-                        storm::pomdp::Belief<ValueType> &belief,
-                        uint64_t actionIndex, uint32_t observation, uint64_t id);
-
-                /**
-                 * Helper to get the id of a Belief stored in a given vector structure
-                 *
-                 * @param grid the vector on which the lookup is performed
-                 * @param observation the observation of the belief
-                 * @param probabilities the probability distribution over the POMDP states of the Belief
-                 * @return if the belief was found in the vector, the belief's ID, otherwise -1
-                 */
-                uint64_t getBeliefIdInVector(std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
-                                             std::map<uint64_t, ValueType> &probabilities);
-
-                /**
-                 * Helper method to build the transition matrix from a data structure containing transations
-                 *
-                 * @param transitions  data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability)
-                 * @return sparseMatrix representing the transitions
-                 */
-                storm::storage::SparseMatrix<ValueType> buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions);
-
-                /**
-                 * Get the reward for performing an action in a given belief
-                 *
-                 * @param action the index of the action to be performed
-                 * @param belief the belief in which the action is performed
-                 * @return the reward earned by performing the action in the belief
-                 */
-                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
-                ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
-#endif //REMOVE_THIS
+                ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info);
+                
+                std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation);
                
                struct Statistics {
                    Statistics();