Started to integrate new refinement options.

6 years ago · 37490a8eca
3 changed files with 155 additions and 111 deletions
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@ -103,14 +103,17 @@ namespace storm {
                    STORM_PRINT_AND_LOG("Applying grid approximation... ");
                    auto const& gridSettings = storm::settings::getModule<storm::settings::modules::GridApproximationSettings>();
                    typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<ValueType>>::Options options;
                    options.initialGridResolution = gridSettings.getGridResolution();
                    options.explorationThreshold = storm::utility::convertNumber<ValueType>(gridSettings.getExplorationThreshold());
                    options.doRefinement = gridSettings.isRefineSet();
                    options.refinementPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getRefinementPrecision());
                    options.numericPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getNumericPrecision());
                    options.cacheSubsimplices = gridSettings.isCacheSimplicesSet();
                    std::cout << "TODO: create and read from new settings!" << std::endl;
                    // options.initialGridResolution = gridSettings.getGridResolution();
                    // options.explorationThreshold = storm::utility::convertNumber<ValueType>(gridSettings.getExplorationThreshold());
                    options.refine = gridSettings.isRefineSet();
                    options.unfold = true;
                    options.discretize = true;
                    // options.refinementPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getRefinementPrecision());
                    // options.numericPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getNumericPrecision());
                    // options.cacheSubsimplices = gridSettings.isCacheSimplicesSet();
                    if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) {
                        options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
                        //options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
                    }
                    if (storm::NumberTraits<ValueType>::IsExact) {
                        if (gridSettings.isNumericPrecisionSetFromDefault()) {
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -29,13 +29,22 @@ namespace storm {
        namespace modelchecker {
            template<typename PomdpModelType, typename BeliefValueType>
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() {
                initialGridResolution = 10;
                explorationThreshold = storm::utility::zero<ValueType>();
                doRefinement = true;
                refinementPrecision = storm::utility::convertNumber<ValueType>(1e-4);
                discretize = false;
                unfold = false;
                refine = false;
                resolutionInit = 2;
                resolutionFactor = storm::utility::convertNumber<ValueType,uint64_t>(2);
                sizeThresholdInit = 0; // automatic
                sizeThresholdFactor = 4;
                gapThresholdInit = storm::utility::convertNumber<ValueType>(0.1);
                gapThresholdFactor = storm::utility::convertNumber<ValueType>(0.25);
                optimalChoiceValueThresholdInit = storm::utility::convertNumber<ValueType>(1e-3);
                optimalChoiceValueThresholdFactor = storm::utility::one<ValueType>();
                obsThresholdInit = storm::utility::convertNumber<ValueType>(0.1);
                obsThresholdIncrementFactor = storm::utility::convertNumber<ValueType>(0.1);
                numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
                cacheSubsimplices = false;
                beliefMdpSizeThreshold = boost::none;
            }
            template<typename PomdpModelType, typename BeliefValueType>
@ -69,6 +78,7 @@ namespace storm {
            template<typename PomdpModelType, typename BeliefValueType>
            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) {
                STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given.");
                // Reset all collected statistics
                statistics = Statistics();
                // Extract the relevant information from the formula
@ -96,7 +106,7 @@ namespace storm {
                    STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
                }
                if (options.doRefinement) {
                if (options.refine) {
                    refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
                } else {
                    computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
@ -126,7 +136,7 @@ namespace storm {
                // The overapproximation MDP:
                if (statistics.overApproximationStates) {
                    stream << "# Number of states in the ";
                    if (options.doRefinement) {
                    if (options.refine) {
                        stream << "final ";
                    }
                    stream << "grid MDP for the over-approximation: ";
@ -142,7 +152,7 @@ namespace storm {
                // The underapproximation MDP:
                if (statistics.underApproximationStates) {
                    stream << "# Number of states in the ";
                    if (options.doRefinement) {
                    if (options.refine) {
                        stream << "final ";
                    }
                    stream << "grid MDP for the under-approximation: ";
@ -158,28 +168,21 @@ namespace storm {
                stream << "##########################################" << std::endl;
            }
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
                uint64_t underApproxSizeThreshold = 0;
                { // Overapproximation
                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                if (options.discretize) {
                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit);
                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
                    HeuristicParameters heuristicParameters;
                    heuristicParameters.gapThreshold = storm::utility::convertNumber<ValueType>(options.explorationThreshold);
                    heuristicParameters.observationThreshold = storm::utility::zero<ValueType>(); // Not relevant without refinement
                    heuristicParameters.sizeThreshold = std::numeric_limits<uint64_t>::max();
                    heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber<ValueType>(1e-4);
                    heuristicParameters.gapThreshold = options.gapThresholdInit;
                    heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement
                    heuristicParameters.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
                    heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx);
                    if (approx->hasComputedValues()) {
@ -187,22 +190,23 @@ namespace storm {
                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
                        ValueType& resultValue = min ? result.lowerBound : result.upperBound;
                        resultValue = approx->getComputedValueAtInitialState();
                        underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold);
                    }
                }
                { // Underapproximation (Uses a fresh Belief manager)
                if (options.unfold) { // Underapproximation (uses a fresh Belief manager)
                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
                    if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) {
                        underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
                    }
                    if (underApproxSizeThreshold == 0) {
                        underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build)
                    }
                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx);
                    HeuristicParameters heuristicParameters;
                    heuristicParameters.gapThreshold = options.gapThresholdInit;
                    heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
                    heuristicParameters.sizeThreshold = options.sizeThresholdInit;
                    if (heuristicParameters.sizeThreshold == 0) {
                        // Select a decent value automatically
                        heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation();
                    }
                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx);
                    if (approx->hasComputedValues()) {
                        STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
@ -215,76 +219,91 @@ namespace storm {
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
                ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
                ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
                // Set up exploration data
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                auto overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                auto underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                std::vector<uint64_t> observationResolutionVector;
                std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
                std::shared_ptr<ExplorerType> overApproximation;
                HeuristicParameters overApproxHeuristicPar;
                if (options.discretize) { // Setup and build first OverApproximation
                    observationResolutionVector = std::vector<uint64_t>(pomdp.getNrObservations(), options.resolutionInit);
                    overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                    if (rewardModelName) {
                        overApproxBeliefManager->setRewardModel(rewardModelName);
                    underApproxBeliefManager->setRewardModel(rewardModelName);
                    }
                // OverApproximaion
                auto overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                HeuristicParameters heuristicParameters;
                heuristicParameters.gapThreshold = storm::utility::convertNumber<ValueType>(options.explorationThreshold);
                heuristicParameters.observationThreshold = storm::utility::zero<ValueType>(); // Will be set to lowest observation score automatically
                heuristicParameters.sizeThreshold = std::numeric_limits<uint64_t>::max();
                heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber<ValueType>(1e-4);
                buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation);
                    overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                    overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
                    overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
                    overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
                    overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation);
                    if (!overApproximation->hasComputedValues()) {
                        return;
                    }
                ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
                    overApproxValue = overApproximation->getComputedValueAtInitialState();
                }
                // UnderApproximation
                uint64_t underApproxSizeThreshold;
                if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) {
                    underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
                } else {
                    underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
                std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
                std::shared_ptr<ExplorerType> underApproximation;
                HeuristicParameters underApproxHeuristicPar;
                if (options.unfold) { // Setup and build first OverApproximation
                    underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                    if (rewardModelName) {
                        underApproxBeliefManager->setRewardModel(rewardModelName);
                    }
                    underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                    underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
                    underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
                    underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
                    if (underApproxHeuristicPar.sizeThreshold == 0) {
                        // Select a decent value automatically
                        underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation();
                    }
                auto underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
                    buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation);
                    if (!underApproximation->hasComputedValues()) {
                        return;
                    }
                ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
                    underApproxValue = underApproximation->getComputedValueAtInitialState();
                }
                // ValueType lastMinScore = storm::utility::infinity<ValueType>();
                // Start refinement
                statistics.refinementSteps = 0;
                while (result.diff() > options.refinementPrecision) {
                STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || options.refinePrecision.is_initialized(), "No termination criterion for refinement given. Consider to specify a steplimit, precisionlimit or timeout");
                STORM_LOG_WARN_COND(!options.refinePrecision.is_initialized() || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined.");
                while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps < options.refineStepLimit.get()) && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) {
                    if (storm::utility::resources::isTerminate()) {
                        break;
                    }
                    ++statistics.refinementSteps.get();
                    STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << ".");
                    STORM_PRINT_AND_LOG("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "." << std::endl);
                    if (options.discretize) {
                        // Refine over-approximation
                        if (min) {
                            overApproximation->takeCurrentValuesAsLowerBounds();
                        } else {
                            overApproximation->takeCurrentValuesAsUpperBounds();
                        }
                    heuristicParameters.gapThreshold /= storm::utility::convertNumber<ValueType, uint64_t>(4);
                    heuristicParameters.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * 4;
                    heuristicParameters.observationThreshold += storm::utility::convertNumber<ValueType>(0.1) * (storm::utility::one<ValueType>() - heuristicParameters.observationThreshold);
                    buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation);
                        overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
                        overApproxHeuristicPar.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor;
                        overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
                        overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
                        buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation);
                        if (overApproximation->hasComputedValues()) {
                            overApproxValue = overApproximation->getComputedValueAtInitialState();
                        } else {
                            break;
                        }
                    }
                    if (result.diff() > options.refinementPrecision) {
                    if (options.unfold && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) {
                        // Refine under-approximation
                        underApproxSizeThreshold *= 4;
                        underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
                        STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << ".");
                        buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
                        overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
                        underApproxHeuristicPar.sizeThreshold = underApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor;
                        overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
                        buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation);
                        if (underApproximation->hasComputedValues()) {
                            underApproxValue = underApproximation->getComputedValueAtInitialState();
                        } else {
@ -352,7 +371,7 @@ namespace storm {
            }
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
                // current maximal resolution (needed for refinement heuristic)
                uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
@ -371,9 +390,7 @@ namespace storm {
                    overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
                    // We also need to find out which observation resolutions needs refinement.
                    auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution);
                    ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
                    // Potentially increase the observationThreshold so that at least one observation actually gets refinement.
                    heuristicParameters.observationThreshold = std::max(minRating, heuristicParameters.observationThreshold);
                    refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;});
                    STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
                    for (auto const& obs : refinedObservations) {
@ -528,11 +545,11 @@ namespace storm {
            }
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
                statistics.underApproximationBuildTime.start();
                statistics.underApproximationStateLimit = maxStateCount;
                if (!underApproximation->hasComputedValues()) {
                statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
                if (!refine) {
                    // Build a new under approximation
                    if (computeRewards) {
                        underApproximation->startNewExploration(storm::utility::zero<ValueType>());
@ -545,6 +562,7 @@ namespace storm {
                }
                // Expand the beliefs
                uint64_t newlyExploredStates = 0;
                while (underApproximation->hasUnexploredState()) {
                    uint64_t currId = underApproximation->exploreNextState();
@ -554,18 +572,24 @@ namespace storm {
                        underApproximation->addSelfloopTransition();
                    } else {
                        bool stopExploration = false;
                        if (!underApproximation->currentStateHasOldBehavior()) {
                            if (storm::utility::abs<ValueType>(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                        bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
                        if (!stateAlreadyExplored) {
                            // Check whether we want to explore the state now!
                            if (storm::utility::abs<ValueType>(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) {
                                stopExploration = true;
                                underApproximation->setCurrentStateIsTruncated();
                            } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) {
                            } else if (newlyExploredStates >= heuristicParameters.sizeThreshold) {
                                stopExploration = true;
                                underApproximation->setCurrentStateIsTruncated();
                            }
                        }
                        if (!stopExploration) {
                            // We are going to explore one more state
                            ++newlyExploredStates;
                        }
                        for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
                            // Always restore old behavior if available
                            if (underApproximation->currentStateHasOldBehavior()) {
                            if (stateAlreadyExplored) {
                                underApproximation->restoreOldBehaviorAtCurrentState(action);
                            } else {
                                ValueType truncationProbability = storm::utility::zero<ValueType>();
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -28,13 +28,30 @@ namespace storm {
                struct Options {
                    Options();
                    uint64_t  initialGridResolution; /// Decides how precise the bounds are
                    ValueType explorationThreshold; /// the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
                    bool doRefinement; /// Sets whether the bounds should be refined automatically until the refinement precision is reached
                    ValueType refinementPrecision; /// Used to decide when the refinement should terminate
                    ValueType numericPrecision; /// Used to decide whether two values are equal
                    bool cacheSubsimplices; /// Enables caching of subsimplices
                    boost::optional<uint64_t> beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection.
                    bool discretize;
                    bool unfold;
                    bool refine;
                    boost::optional<uint64_t> refineStepLimit;
                    boost::optional<ValueType> refinePrecision;
                    // Controlparameters for the refinement heuristic
                    // Discretization Resolution
                    uint64_t  resolutionInit;
                    ValueType resolutionFactor;
                    // The maximal number of newly expanded MDP states in a refinement step
                    uint64_t sizeThresholdInit;
                    uint64_t sizeThresholdFactor;
                    // Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore
                    ValueType gapThresholdInit;
                    ValueType gapThresholdFactor;
                    // Controls whether "almost optimal" choices will be considered optimal
                    ValueType optimalChoiceValueThresholdInit;
                    ValueType optimalChoiceValueThresholdFactor;
                    // Controls which observations are refined.
                    ValueType obsThresholdInit;
                    ValueType obsThresholdIncrementFactor;
                    ValueType numericPrecision; /// Used to decide whether two beliefs are equal
                };
                struct Result {
@ -85,12 +102,12 @@ namespace storm {
                /**
                 * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                 */
                void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
                void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
                /**
                 * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
                 */
                void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
                void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
                ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);