Changed method signatures to new data structures.

6 years ago · 71e0654498
3 changed files with 200 additions and 682 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -6,6 +6,7 @@
 #include <map>
 #include <boost/optional.hpp>
 #include "storm-parsers/api/properties.h"
 #include "storm/api/properties.h"
 #include "storm/api/verification.h"
@ -13,20 +14,25 @@
 #include "storm/utility/macros.h"
 #include "storm-pomdp/storage/BeliefManager.h"
 #include "storm/utility/SignalHandler.h"
 #include "storm/modelchecker/results/CheckResult.h"
 #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
 #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
 #include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
 namespace storm {
    namespace builder {
        template<typename PomdpType>
        template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType>
        class BeliefMdpExplorer {
        public:
            typedef typename PomdpType::ValueType ValueType;
            typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
            typedef storm::storage::BeliefManager<PomdpType, BeliefValueType> BeliefManagerType;
            typedef typename BeliefManagerType::BeliefId BeliefId;
            typedef uint64_t MdpStateType;
            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
                // Intentionally left empty
            }
            BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
            void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
                // Reset data from potential previous explorations
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -15,13 +15,8 @@
 #include "storm/models/sparse/StandardRewardModel.h"
 #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h"
 #include "storm/utility/vector.h"
 #include "storm/modelchecker/results/CheckResult.h"
 #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
 #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
 #include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
 #include "storm/api/properties.h"
 #include "storm/api/export.h"
 #include "storm-parsers/api/storm-parsers.h"
 #include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
@ -32,8 +27,8 @@
 namespace storm {
    namespace pomdp {
        namespace modelchecker {
            template<typename ValueType, typename RewardModelType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Options::Options() {
            template<typename PomdpModelType, typename BeliefValueType>
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() {
                initialGridResolution = 10;
                explorationThreshold = storm::utility::zero<ValueType>();
                doRefinement = true;
@ -41,61 +36,78 @@ namespace storm {
                numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
                cacheSubsimplices = false;
            }
            template<typename ValueType, typename RewardModelType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Statistics::Statistics() :  overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
            template<typename PomdpModelType, typename BeliefValueType>
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) {
                // Intentionally left empty
            }
            template<typename PomdpModelType, typename BeliefValueType>
            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::diff(bool relative) const {
                ValueType diff = upperBound - lowerBound;
                if (diff < storm::utility::zero<ValueType>()) {
                    STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
                    diff = storm::utility::zero<ValueType >();
                }
                if (relative && !storm::utility::isZero(upperBound)) {
                    diff /= upperBound;
                }
                return diff;
            }
            template<typename PomdpModelType, typename BeliefValueType>
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Statistics::Statistics() :  overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
                // intentionally left empty;
            }
            template<typename ValueType, typename RewardModelType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options) : pomdp(pomdp), options(options) {
            template<typename PomdpModelType, typename BeliefValueType>
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) {
                cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false);
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::check(storm::logic::Formula const& formula) {
            template<typename PomdpModelType, typename BeliefValueType>
            typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) {
                // Reset all collected statistics
                statistics = Statistics();
                std::unique_ptr<POMDPCheckResult<ValueType>> result;
                // Extract the relevant information from the formula
                auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
                // Compute some initial bounds on the values for each state of the pomdp
                auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
                Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]);
                if (formulaInfo.isNonNestedReachabilityProbability()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
                    if (!formulaInfo.getSinkStates().empty()) {
                        auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
                        reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
                        STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
                    }
                    if (options.doRefinement) {
                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
                    } else {
                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                    }
                } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
                boost::optional<std::string> rewardModelName;
                if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
                    // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
                    STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
                    if (options.doRefinement) {
                        result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true);
                    if (formulaInfo.isNonNestedReachabilityProbability()) {
                        if (!formulaInfo.getSinkStates().empty()) {
                            auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
                            reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
                            STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
                        }
                    } else {
                        // FIXME: pick the non-unique reward model here
                        STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
                        result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
                        // Expected reward formula!
                        rewardModelName = formulaInfo.getRewardModelName();
                    }
                } else {
                    STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
                }
                if (options.doRefinement) {
                    refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
                } else {
                    computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
                }
                if (storm::utility::resources::isTerminate()) {
                    statistics.aborted = true;
                }
                return result;
            }
            template<typename ValueType, typename RewardModelType>
            void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::printStatisticsToStream(std::ostream& stream) const {
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::printStatisticsToStream(std::ostream& stream) const {
                stream << "##### Grid Approximation Statistics ######" << std::endl;
                stream << "# Input model: " << std::endl;
                pomdp.printModelInformationToStream(stream);
@ -143,114 +155,82 @@ namespace storm {
                stream << "##########################################" << std::endl;
            }
            std::shared_ptr<storm::logic::Formula const> createStandardProperty(bool min, bool computeRewards) {
                std::string propertyString = computeRewards ? "R" : "P";
                propertyString += min ? "min" : "max";
                propertyString += "=? [F \"target\"]";
                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
                return storm::api::extractFormulasFromProperties(propertyVector).front();
            }
            template<typename ValueType>
            storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property, std::vector<ValueType>&& hintVector) {
                //Note: The property should not run out of scope after calling this because the task only stores the property by reference.
                // Therefore, this method needs the property by reference (and not const reference)
                auto task = storm::api::createTask<ValueType>(property, false);
                if (!hintVector.empty()) {
                    auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
                    hint.setResultHint(std::move(hintVector));
                    auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
                    task.setHint(hintPtr);
                }
                return task;
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards) {
                std::srand(time(NULL));
                // Compute easy upper and lower bounds
                storm::utility::Stopwatch underlyingWatch(true);
                // Compute the results on the underlying MDP as a basic overapproximation
                storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling());
                // TODO: Is the following really necessary
                underlyingMdpLabeling.addLabel("__goal__");
                std::vector<uint64_t> goalStates;
                for (auto const &targetObs : targetObservations) {
                    for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) {
                        underlyingMdpLabeling.addLabelToState("__goal__", goalState);
                    }
                }
                storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels());
                auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(
                        std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp));
                std::string initPropString = computeRewards ? "R" : "P";
                initPropString += min ? "min" : "max";
                initPropString += "=? [F \"__goal__\"]";
                std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString);
                std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front();
                STORM_PRINT("Underlying MDP" << std::endl)
                if (computeRewards) {
                    underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel());
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
                underlyingMdp.printModelInformationToStream(std::cout);
                std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(
                        storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
                STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
                underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
                auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                underlyingWatch.stop();
                storm::utility::Stopwatch positionalWatch(true);
                // we define some positional scheduler for the POMDP as a basic lower bound
                storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
                for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
                    auto obsStates = pomdp.getStatesWithObservation(obs);
                    // select a random action for all states with the same observation
                    uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front());
                    for (auto const &state : obsStates) {
                        pomdpScheduler.setChoice(chosenAction, state);
                uint64_t underApproxSizeThreshold = 0;
                { // Overapproximation
                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
                    auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager);
                    if (approx) {
                        STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n");
                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
                        ValueType& resultValue = min ? result.lowerBound : result.upperBound;
                        resultValue = approx->getComputedValueAtInitialState();
                        underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates();
                    }
                }
                { // Underapproximation (Uses a fresh Belief manager)
                    auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
                    auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager);
                    if (approx) {
                        STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
                        approx->getExploredMdp()->printModelInformationToStream(std::cout);
                        ValueType& resultValue = min ? result.upperBound : result.lowerBound;
                        resultValue = approx->getComputedValueAtInitialState();
                    }
                }
                auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false);
                if (computeRewards) {
                    underApproxModel->restrictRewardModels({"std"});
                }
                STORM_PRINT("Random Positional Scheduler" << std::endl)
                underApproxModel->printModelInformationToStream(std::cout);
                std::unique_ptr<storm::modelchecker::CheckResult> underapproxRes(
                        storm::api::verifyWithSparseEngine<ValueType>(underApproxModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
                STORM_LOG_ASSERT(underapproxRes, "Result not exist.");
                underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true)));
                auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                positionalWatch.stop();
                STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // "
                                                       << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl)
                STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
                // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
                // This can probably be improved (i.e. resolutions for single belief states)
                STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl)
            }
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
                // Set up exploration data
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                std::set<uint32_t> changedObservations;
                uint64_t underApproxModelSize = 200;
                uint64_t refinementCounter = 1;
                STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
                std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
                                                                                                  {},
                                                                                                  {}, underApproxModelSize);
                if (res == nullptr) {
                    statistics.refinementSteps = 0;
                    return nullptr;
                auto beliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
                if (rewardModelName) {
                    beliefManager->setRewardModel(rewardModelName);
                }
                // OverApproximaion
                auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager);
                if (!overApproximation) {
                    return;
                }
                ValueType lastMinScore = storm::utility::infinity<ValueType>();
                while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) ||
                                                    (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) {
                ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
                overApproxValue = overApproximation->getComputedValueAtInitialState();
                // UnderApproximation TODO: use same belief manager?)
                uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
                auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager);
                if (!underApproximation) {
                    return;
                }
                ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
                underApproxValue = underApproximation->getComputedValueAtInitialState();
                // ValueType lastMinScore = storm::utility::infinity<ValueType>();
                // Start refinement
                statistics.refinementSteps = 0;
                while (result.diff() > options.refinementPrecision) {
                    if (storm::utility::resources::isTerminate()) {
                        break;
                    }
                    // TODO the actual refinement
                    /*
                    // choose which observation(s) to refine
                    std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
                    std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
@ -286,9 +266,9 @@ namespace storm {
                    }
                    /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
                        obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
                    }*/
                    //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
                       // obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
                    //}
                    changedObservations.clear();
                    //TODO think about some other scoring methods
@ -301,21 +281,21 @@ namespace storm {
                        observationResolutionVector[i] = maxRes + 1;
                        changedObservations.insert(i);
                    }
                    /*} else {
                        lastMinScore = std::min(maxAvgDifference, lastMinScore);
                        STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
                        STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
                        //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
                        for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
                    //} else {
                   //     lastMinScore = std::min(maxAvgDifference, lastMinScore);
                   //     STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
                   //     STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
                   //     //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
                    //    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
                            //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i])
                            if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
                   //         if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
                                //STORM_PRINT(" *** ")
                                observationResolutionVector[i] += 1;
                                changedObservations.insert(i);
                            }
                  //              observationResolutionVector[i] += 1;
                   //             changedObservations.insert(i);
                    //        }
                            //STORM_PRINT(std::endl)
                        }
                    }*/
                    //    }
                    //}
                    if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) {
                        underApproxModelSize += 100;
                    }
@ -327,60 +307,13 @@ namespace storm {
                    STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
                                         cc.isEqual(res->underApproxValue, res->overApproxValue),
                                         "The value for the under-approximation is larger than the value for the over-approximation.");
                    ++refinementCounter;
                }
                statistics.refinementSteps = refinementCounter;
                if (min) {
                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->underApproxValue, res->overApproxValue});
                } else {
                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->overApproxValue, res->underApproxValue});
                }
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
                                                                                             bool computeRewards,
                                                                                             std::vector<ValueType> const& lowerPomdpValueBounds,
                                                                                             std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                             uint64_t maxUaModelSize) {
                STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
                auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
                                                         upperPomdpValueBounds, maxUaModelSize);
                if (result == nullptr) {
                    return nullptr;
                }
                if (min) {
                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->underApproxValue, result->overApproxValue});
                } else {
                    return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
                    */
                    ++statistics.refinementSteps.get();
                }
            }
            template <typename ValueType, typename BeliefType, typename SummandsType>
            ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
                ValueType result = storm::utility::zero<ValueType>();
                for (auto const& entry : belief) {
                    result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
                }
                return result;
            }
            template<typename ValueType, typename RewardModelType>
            std::shared_ptr<RefinementComponents<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                                 std::vector<uint64_t> &observationResolutionVector,
                                                                                                 bool computeRewards,
                                                                                                 std::vector<ValueType> const& lowerPomdpValueBounds,
                                                                                                 std::vector<ValueType> const& upperPomdpValueBounds,
                                                                                                 uint64_t maxUaModelSize) {
                auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
                if (computeRewards) {
                    beliefManager->setRewardModel(); // TODO: get actual name
                }
            template<typename PomdpModelType, typename BeliefValueType>
            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager) {
                statistics.overApproximationBuildTime.start();
                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                if (computeRewards) {
@ -390,9 +323,6 @@ namespace storm {
                }
                // Expand the beliefs to generate the grid on-the-fly
                if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
                    STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
                }
                while (explorer.hasUnexploredState()) {
                    uint64_t currId = explorer.exploreNextState();
@ -445,39 +375,20 @@ namespace storm {
                explorer.finishExploration();
                statistics.overApproximationBuildTime.stop();
                STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
                statistics.overApproximationCheckTime.start();
                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.overApproximationCheckTime.stop();
                STORM_PRINT("Time Overapproximation: " <<  statistics.overApproximationCheckTime << " seconds." << std::endl);
                STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
                //auto underApprox = weightedSumUnderMap[initialBelief.id];
                auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds);
                if (storm::utility::resources::isTerminate() && !underApproxComponents) {
                    // TODO: return other components needed for refinement.
                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
                    //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
                }
                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
                /* TODO: return other components needed for refinement.
                return std::make_unique<RefinementComponents<ValueType>>(
                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
                                                        underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
                                                        underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
                */
                return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
                                                                                                         underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
                return std::make_shared<ExplorerType>(std::move(explorer));
            }
            template<typename ValueType, typename RewardModelType>
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
            /*TODO:
                       template<typename PomdpModelType, typename BeliefValueType>
            std::shared_ptr<RefinementComponents<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
            ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
                                                                                            std::vector<uint64_t> &observationResolutionVector,
                                                                                            bool computeRewards,
                                                                                            std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
@ -504,7 +415,7 @@ namespace storm {
                uint64_t nextBeliefId = refinementComponents->beliefList.size();
                uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
                std::set<uint64_t> relevantStates;
                std::set<uint64_t> relevantStates; // The MDP states where the observation has changed
                for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
                    auto currentBelief = refinementComponents->beliefList[iter.first];
                    if (changedObservations.find(currentBelief.observation) != changedObservations.end()) {
@ -512,7 +423,7 @@ namespace storm {
                    }
                }
                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck;
                std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck; // The predecessors of states where the observation has changed
                for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) {
                    for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) {
                        for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(
@ -536,6 +447,7 @@ namespace storm {
                                                                                                                              action);
                    std::map<uint64_t, ValueType> transitionInActionBelief;
                    for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
                        // Expand and triangulate the successor
                        uint32_t observation = iter->first;
                        uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
                                                                                   targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId);
@ -803,160 +715,12 @@ namespace storm {
                                                        refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap,
                                                        underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId});
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<POMDPCheckResult<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
                std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
              //  return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
            */
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
                                                                                                std::vector<bool> &beliefIsTarget,
                                                                                                std::set<uint32_t> const &targetObservations,
                                                                                                uint64_t initialBeliefId, bool min,
                                                                                                bool computeRewards, uint64_t maxModelSize) {
                std::set<uint64_t> visitedBelieves;
                std::deque<uint64_t> beliefsToBeExpanded;
                bsmap_type beliefStateMap;
                std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
                                                                                       {{{1, storm::utility::one<ValueType>()}}}};
                std::vector<uint64_t> targetStates = {1};
                uint64_t stateId = 2;
                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId));
                ++stateId;
                uint64_t nextId = beliefList.size();
                uint64_t counter = 0;
                statistics.underApproximationBuildTime.start();
                // Expand the believes
                visitedBelieves.insert(initialBeliefId);
                beliefsToBeExpanded.push_back(initialBeliefId);
                while (!beliefsToBeExpanded.empty()) {
                    //TODO think of other ways to stop exploration besides model size
                    auto currentBeliefId = beliefsToBeExpanded.front();
                    uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front());
                    // for targets, we only consider one action with one transition
                    if (beliefIsTarget[currentBeliefId]) {
                        // add a self-loop to target states
                        targetStates.push_back(beliefStateMap.left.at(currentBeliefId));
                        transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one<ValueType>()}}});
                    } else if (counter > maxModelSize) {
                        transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
                    } else {
                        // Iterate over all actions and add the corresponding transitions
                        std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
                        //TODO add a way to extract the actions from the over-approx and use them here?
                        for (uint64_t action = 0; action < numChoices; ++action) {
                            std::map<uint64_t, ValueType> transitionsInStateWithAction;
                            std::map<uint32_t, ValueType> observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action);
                            for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) {
                                uint32_t observation = iter->first;
                                uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId],
                                                                                           action,
                                                                                           observation, nextId);
                                nextId = beliefList.size();
                                if (visitedBelieves.insert(nextBeliefId).second) {
                                    beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId));
                                    ++stateId;
                                    beliefsToBeExpanded.push_back(nextBeliefId);
                                    ++counter;
                                }
                                transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second;
                            }
                            actionTransitionStorage.push_back(transitionsInStateWithAction);
                        }
                        transitions.push_back(actionTransitionStorage);
                    }
                    beliefsToBeExpanded.pop_front();
                    if (storm::utility::resources::isTerminate()) {
                        statistics.underApproximationBuildAborted = true;
                        break;
                    }
                }
                statistics.underApproximationStates = transitions.size();
                if (storm::utility::resources::isTerminate()) {
                    statistics.underApproximationBuildTime.stop();
                    return nullptr;
                }
            template<typename PomdpModelType, typename BeliefValueType>
            std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager) {
                storm::models::sparse::StateLabeling labeling(transitions.size());
                labeling.addLabel("init");
                labeling.addLabel("target");
                labeling.addLabelToState("init", 0);
                for (auto targetState : targetStates) {
                    labeling.addLabelToState("target", targetState);
                }
                std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> model;
                auto transitionMatrix = buildTransitionMatrix(transitions);
                if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) {
                    transitionMatrix.makeRowGroupingTrivial();
                }
                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(transitionMatrix, labeling);
                storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
                if (computeRewards) {
                    storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
                    for (auto const &iter : beliefStateMap.left) {
                        auto currentBelief = beliefList[iter.first];
                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
                        for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
                            // Add the reward
                            rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
                                                             getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
                                                                                  currentBelief));
                        }
                    }
                    underApproxMdp.addRewardModel("std", rewardModel);
                    underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
                }
                model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
                model->printModelInformationToStream(std::cout);
                statistics.underApproximationBuildTime.stop();
                std::string propertyString;
                if (computeRewards) {
                    propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
                } else {
                    propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
                }
                std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
                std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
                statistics.underApproximationCheckTime.start();
                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
                statistics.underApproximationCheckTime.stop();
                if (storm::utility::resources::isTerminate() && !res) {
                    return nullptr;
                }
                STORM_LOG_ASSERT(res, "Result does not exist.");
                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
            }
            template<typename ValueType, typename RewardModelType>
            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
                                                                                                std::set<uint32_t> const &targetObservations, bool min,
                                                                                                bool computeRewards, uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds) {
                // Build the belief MDP until enough states are explored.
                //TODO think of other ways to stop exploration besides model size
                statistics.underApproximationBuildTime.start();
                storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
                if (computeRewards) {
@ -981,7 +745,7 @@ namespace storm {
                        if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
                            stopExploration = true;
                            explorer.setCurrentStateIsTruncated();
                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) {
                        } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) {
                            stopExploration = true;
                            explorer.setCurrentStateIsTruncated();
                        }
@ -1024,249 +788,22 @@ namespace storm {
                explorer.finishExploration();
                statistics.underApproximationBuildTime.stop();
                STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl);
                explorer.getExploredMdp()->printModelInformationToStream(std::cout);
                statistics.underApproximationCheckTime.start();
                explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                statistics.underApproximationCheckTime.stop();
                STORM_PRINT("Time Underapproximation: " <<  statistics.underApproximationCheckTime << " seconds." << std::endl);
                STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{explorer.getComputedValueAtInitialState(), {}, {}});
            }
            template<typename ValueType, typename RewardModelType>
            storm::storage::SparseMatrix<ValueType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions) {
                uint_fast64_t currentRow = 0;
                uint_fast64_t currentRowGroup = 0;
                uint64_t nrColumns = transitions.size();
                uint64_t nrRows = 0;
                uint64_t nrEntries = 0;
                for (auto const &actionTransitions : transitions) {
                    for (auto const &map : actionTransitions) {
                        nrEntries += map.size();
                        ++nrRows;
                    }
                }
                storm::storage::SparseMatrixBuilder<ValueType> smb(nrRows, nrColumns, nrEntries, true, true);
                for (auto const &actionTransitions : transitions) {
                    smb.newRowGroup(currentRow);
                    for (auto const &map : actionTransitions) {
                        for (auto const &transition : map) {
                            smb.addNextValue(currentRow, transition.first, transition.second);
                        }
                        ++currentRow;
                    }
                    ++currentRowGroup;
                }
                return smb.build();
            }
            template<typename ValueType, typename RewardModelType>
            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInVector(
                    std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
                    std::map<uint64_t, ValueType> &probabilities) {
                // TODO This one is quite slow
                for (auto const &belief : grid) {
                    if (belief.observation == observation) {
                        bool same = true;
                        for (auto const &probEntry : belief.probabilities) {
                            if (probabilities.find(probEntry.first) == probabilities.end()) {
                                same = false;
                                break;
                            }
                            if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) {
                                same = false;
                                break;
                            }
                        }
                        if (same) {
                            return belief.id;
                        }
                    }
                }
                return -1;
            }
            template<typename ValueType, typename RewardModelType>
            storm::pomdp::Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(uint64_t id) {
                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
                                 "POMDP contains more than one initial state");
                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
                                 "POMDP does not contain an initial state");
                std::map<uint64_t, ValueType> distribution;
                uint32_t observation = 0;
                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
                    if (pomdp.getInitialStates()[state] == 1) {
                        distribution[state] = storm::utility::one<ValueType>();
                        observation = pomdp.getObservation(state);
                        break;
                    }
                }
                return storm::pomdp::Belief<ValueType>{id, observation, distribution};
            }
            template<typename ValueType, typename RewardModelType>
            std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeSubSimplexAndLambdas(
                    std::map<uint64_t, ValueType> &probabilities, uint64_t resolution, uint64_t nrStates) {
                //TODO this can also be simplified using the sparse vector interpretation
                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
                // Variable names are based on the paper
                std::vector<ValueType> x(nrStates);
                std::vector<ValueType> v(nrStates);
                std::vector<ValueType> d(nrStates);
                auto convResolution = storm::utility::convertNumber<ValueType>(resolution);
                for (size_t i = 0; i < nrStates; ++i) {
                    for (auto const &probEntry : probabilities) {
                        if (probEntry.first >= i) {
                            x[i] += convResolution * probEntry.second;
                        }
                    }
                    v[i] = storm::utility::floor(x[i]);
                    d[i] = x[i] - v[i];
                }
                auto p = storm::utility::vector::getSortedIndices(d);
                std::vector<std::vector<ValueType>> qs(nrStates, std::vector<ValueType>(nrStates));
                for (size_t i = 0; i < nrStates; ++i) {
                    if (i == 0) {
                        for (size_t j = 0; j < nrStates; ++j) {
                            qs[i][j] = v[j];
                        }
                    } else {
                        for (size_t j = 0; j < nrStates; ++j) {
                            if (j == p[i - 1]) {
                                qs[i][j] = qs[i - 1][j] + storm::utility::one<ValueType>();
                            } else {
                                qs[i][j] = qs[i - 1][j];
                            }
                        }
                    }
                }
                std::vector<std::map<uint64_t, ValueType>> subSimplex(nrStates);
                for (size_t j = 0; j < nrStates; ++j) {
                    for (size_t i = 0; i < nrStates - 1; ++i) {
                        if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][i] - qs[j][i + 1])) {
                            subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution;
                        }
                    }
                    if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][nrStates - 1])) {
                        subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution;
                    }
                }
                std::vector<ValueType> lambdas(nrStates, storm::utility::zero<ValueType>());
                auto sum = storm::utility::zero<ValueType>();
                for (size_t i = 1; i < nrStates; ++i) {
                    lambdas[i] = d[p[i - 1]] - d[p[i]];
                    sum += d[p[i - 1]] - d[p[i]];
                }
                lambdas[0] = storm::utility::one<ValueType>() - sum;
                return std::make_pair(subSimplex, lambdas);
            }
            template<typename ValueType, typename RewardModelType>
            std::map<uint32_t, ValueType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction(
                    storm::pomdp::Belief<ValueType> &belief,
                    uint64_t actionIndex) {
                std::map<uint32_t, ValueType> res;
                // the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
                std::map<uint64_t, ValueType> postProbabilities;
                for (auto const &probEntry : belief.probabilities) {
                    uint64_t state = probEntry.first;
                    auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
                    for (auto const &entry : row) {
                        if (entry.getValue() > 0) {
                            postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
                        }
                    }
                }
                for (auto const &probEntry : postProbabilities) {
                    uint32_t observation = pomdp.getObservation(probEntry.first);
                    if (res.count(observation) == 0) {
                        res[observation] = probEntry.second;
                    } else {
                        res[observation] += probEntry.second;
                    }
                }
                return res;
                return std::make_shared<ExplorerType>(std::move(explorer));
            }
            template<typename ValueType, typename RewardModelType>
            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
                    std::vector<bool> &beliefIsTarget, std::set<uint32_t> const &targetObservations, storm::pomdp::Belief<ValueType> &belief, uint64_t actionIndex,
                    uint32_t observation, uint64_t id) {
                std::map<uint64_t, ValueType> distributionAfter;
                for (auto const &probEntry : belief.probabilities) {
                    uint64_t state = probEntry.first;
                    auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
                    for (auto const &entry : row) {
                        if (pomdp.getObservation(entry.getColumn()) == observation) {
                            distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
                        }
                    }
                }
                // We have to normalize the distribution
                auto sum = storm::utility::zero<ValueType>();
                for (auto const &entry : distributionAfter) {
                    sum += entry.second;
                }
                for (auto const &entry : distributionAfter) {
                    distributionAfter[entry.first] /= sum;
                }
                if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) {
                    auto res = getBeliefIdInVector(beliefList, observation, distributionAfter);
                    return res;
                } else {
                    beliefList.push_back(storm::pomdp::Belief<ValueType>{id, observation, distributionAfter});
                    beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
                    return id;
                }
            template<typename PomdpModelType, typename BeliefValueType>
            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
                // TODO
            }
            template<typename ValueType, typename RewardModelType>
            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
                auto result = storm::utility::zero<ValueType>();
                for (auto const &probEntry : belief) {
                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
                }
                return result;
            }
            template<typename ValueType, typename RewardModelType>
            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
                auto result = storm::utility::zero<ValueType>();
                for (auto const &probEntry : belief.probabilities) {
                    result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
                }
                return result;
            }
            template
            class ApproximatePOMDPModelchecker<double>;
 #ifdef STORM_HAVE_CARL
            template
            class ApproximatePOMDPModelchecker<storm::RationalNumber>;
            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>;
            template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
 #endif
        }
    }
 }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -4,6 +4,7 @@
 #include "storm/utility/logging.h"
 #include "storm-pomdp/storage/Belief.h"
 #include "storm-pomdp/storage/BeliefManager.h"
 #include "storm-pomdp/builder/BeliefMdpExplorer.h"
 #include <boost/bimap.hpp>
 #include "storm/storage/jani/Property.h"
@ -17,12 +18,6 @@ namespace storm {
        namespace modelchecker {
            typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
            template<class ValueType>
            struct POMDPCheckResult {
                ValueType overApproxValue;
                ValueType underApproxValue;
            };
            /**
             *  Struct containing information which is supposed to be persistent over multiple refinement steps
             *
@ -49,9 +44,13 @@ namespace storm {
                bsmap_type underApproxBeliefStateMap;
            };
            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
            template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
            class ApproximatePOMDPModelchecker {
            public:
                typedef typename PomdpModelType::ValueType ValueType;
                typedef typename PomdpModelType::RewardModelType RewardModelType;
                typedef storm::storage::BeliefManager<PomdpModelType, BeliefValueType> BeliefManagerType;
                typedef storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType> ExplorerType;
                struct Options {
                    Options();
@ -63,85 +62,60 @@ namespace storm {
                    bool cacheSubsimplices; /// Enables caching of subsimplices
                };
                ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options = Options());
                struct Result {
                    Result(ValueType lower, ValueType upper);
                    ValueType lowerBound;
                    ValueType upperBound;
                    ValueType diff (bool relative = false) const;
                };
                ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options());
                std::unique_ptr<POMDPCheckResult<ValueType>> check(storm::logic::Formula const& formula);
                Result check(storm::logic::Formula const& formula);
                void printStatisticsToStream(std::ostream& stream) const;
            private:
                /**
                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
                 *
                 * @param targetObservations the set of observations to be reached
                 * @param min true if minimum probability is to be computed
                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 * @param targetObservations set of target observations
                 * @param min true if minimum value is to be computed
                 * @param observationResolutionVector vector containing the resolution to be used for each observation
                 * @param computeRewards true if rewards are to be computed, false if probability is computed
                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
                std::unique_ptr<POMDPCheckResult<ValueType>>
                refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards);
                void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
                /**
                 * Compute the reachability probability of given target observations on a POMDP for the given resolution only.
                 * On-the-fly state space generation is used for the overapproximation
                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
                 *
                 * @param targetObservations the set of observations to be reached
                 * @param min true if minimum probability is to be computed
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
                std::unique_ptr<POMDPCheckResult<ValueType>>
                computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min);
                void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
                /**
                 * Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
                 * On-the-fly state space generation is used for the overapproximation
                 *
                 * @param targetObservations the set of observations to be reached
                 * @param min true if minimum rewards are to be computed
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
                 */
                std::unique_ptr<POMDPCheckResult<ValueType>>
                computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min);
                std::shared_ptr<ExplorerType> computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager);
                void refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
            private:
                /**
                 * Helper method to compute the inital step of the refinement loop
                 *
                 * @param targetObservations set of target observations
                 * @param min true if minimum value is to be computed
                 * @param observationResolutionVector vector containing the resolution to be used for each observation
                 * @param computeRewards true if rewards are to be computed, false if probability is computed
                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
                 * @return struct containing components generated during the computation to be used in later refinement iterations
                 * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
                 */
                std::shared_ptr<RefinementComponents<ValueType>>
                computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
                                           bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
                std::shared_ptr<ExplorerType> computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager);
                std::shared_ptr<RefinementComponents<ValueType>>
                computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
                                      bool computeRewards, std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
                                      std::set<uint32_t> changedObservations,
                                      boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
                                      boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
                void refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
                /**
                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
                 *
                 * @param targetObservations set of target observations
                 * @param min true if minimum value is to be computed
                 * @param observationResolutionVector vector containing the resolution to be used for each observation
                 * @param computeRewards true if rewards are to be computed, false if probability is computed
                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
                std::unique_ptr<POMDPCheckResult<ValueType>>
                computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
                                       std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
 #ifdef REMOVE_THIS
                /**
                 * Helper to compute an underapproximation of the reachability property.
                 * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
@ -243,7 +217,8 @@ namespace storm {
                 */
                ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
                ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
 #endif //REMOVE_THIS
                struct Statistics {
                    Statistics();
                    boost::optional<uint64_t> refinementSteps;
@ -262,7 +237,7 @@ namespace storm {
                };
                Statistics statistics;
                storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp;
                PomdpModelType const& pomdp;
                Options options;
                storm::utility::ConstantsComparator<ValueType> cc;
            };