First version for the over-approximation of POMDP reachability

6 years ago · 7f9ad39d34
5 changed files with 566 additions and 4 deletions
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@ -38,6 +38,7 @@
 #include "storm-pomdp/transformer/BinaryPomdpTransformer.h"
 #include "storm-pomdp/analysis/UniqueObservationStates.h"
 #include "storm-pomdp/analysis/QualitativeAnalysis.h"
 #include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h"
 #include "storm/api/storm.h"
 /*!
@ -89,6 +90,18 @@ int main(const int argc, const char** argv) {
        auto const& coreSettings = storm::settings::getModule<storm::settings::modules::CoreSettings>();
        auto const& pomdpSettings = storm::settings::getModule<storm::settings::modules::POMDPSettings>();
        auto const &general = storm::settings::getModule<storm::settings::modules::GeneralSettings>();
        auto const &debug = storm::settings::getModule<storm::settings::modules::DebugSettings>();
        if (general.isVerboseSet()) {
            storm::utility::setLogLevel(l3pp::LogLevel::INFO);
        }
        if (debug.isDebugSet()) {
            storm::utility::setLogLevel(l3pp::LogLevel::DEBUG);
        }
        if (debug.isTraceSet()) {
            storm::utility::setLogLevel(l3pp::LogLevel::TRACE);
        }
        // For several engines, no model building step is performed, but the verification is started right away.
        storm::settings::modules::CoreSettings::Engine engine = coreSettings.getEngine();
@ -99,6 +112,9 @@ int main(const int argc, const char** argv) {
        STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP.");
        std::shared_ptr<storm::models::sparse::Pomdp<storm::RationalNumber>> pomdp = model->template as<storm::models::sparse::Pomdp<storm::RationalNumber>>();
        // For ease of testing
        storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::RationalNumber> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::RationalNumber>();
        std::shared_ptr<storm::logic::Formula const> formula;
        if (!symbolicInput.properties.empty()) {
            formula = symbolicInput.properties.front().getRawFormula();
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -0,0 +1,419 @@
 #include <storm/utility/ConstantsComparator.h>
 #include "ApproximatePOMDPModelchecker.h"
 #include "storm/utility/vector.h"
 namespace storm {
    namespace pomdp {
        namespace modelchecker {
            template<typename ValueType, typename RewardModelType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker() {
                //Intentionally left empty
            }
            template<typename ValueType, typename RewardModelType>
            /*std::unique_ptr<POMDPCheckResult>*/ void
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbability(
                    storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                    std::set<uint32_t> target_observations, bool min, uint64_t gridResolution) {
                //TODO add timing
                uint64_t maxIterations = 100;
                bool finished = false;
                uint64_t iteration = 0;
                std::vector<Belief<ValueType>> beliefGrid;
                std::vector<bool> beliefIsKnown;
                constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown);
                std::map<uint64_t, ValueType> result;
                std::map<uint64_t, ValueType> result_backup;
                std::vector<std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
                std::vector<std::vector<std::map<uint32_t, Belief<ValueType>>>> nextBelieves;
                uint64_t nextId = beliefGrid.size();
                for (size_t i = 0; i < beliefGrid.size(); ++i) {
                    auto currentBelief = beliefGrid[i];
                    bool isTarget = beliefIsKnown[i];
                    if (isTarget) {
                        result.emplace(std::make_pair(currentBelief.id, storm::utility::one<ValueType>()));
                        result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one<ValueType>()));
                    } else {
                        result.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
                        result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
                        std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction;
                        std::vector<std::map<uint32_t, Belief<ValueType>>> nextBelievesInAction;
                        uint64_t numChoices = pomdp.getNumberOfChoices(
                                pomdp.getStatesWithObservation(currentBelief.observation).front());
                        for (uint64_t action = 0; action < numChoices; ++action) {
                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
                                    pomdp, currentBelief, action);
                            std::map<uint32_t, Belief<ValueType>> actionObservationBelieves;
                            for (auto iter = actionObservationProbabilities.begin();
                                 iter != actionObservationProbabilities.end(); ++iter) {
                                uint32_t observation = iter->first;
                                actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp,
                                                                                                            currentBelief,
                                                                                                            action,
                                                                                                            observation,
                                                                                                            nextId);
                                ++nextId;
                            }
                            observationProbabilitiesInAction.push_back(actionObservationProbabilities);
                            nextBelievesInAction.push_back(actionObservationBelieves);
                        }
                        observationProbabilities.push_back(observationProbabilitiesInAction);
                        nextBelieves.push_back(nextBelievesInAction);
                    }
                }
                // Value Iteration
                while (!finished && iteration < maxIterations) {
                    STORM_LOG_DEBUG("Iteration " << std::to_string(iteration));
                    bool improvement = false;
                    for (size_t i = 0; i < beliefGrid.size(); ++i) {
                        bool isTarget = beliefIsKnown[i];
                        if (!isTarget) {
                            Belief<ValueType> currentBelief = beliefGrid[i];
                            // we can take any state with the observation as they have the same number of choices
                            uint64_t numChoices = pomdp.getNumberOfChoices(
                                    pomdp.getStatesWithObservation(currentBelief.observation).front());
                            // Initialize the values for the value iteration
                            ValueType bestValue = min ? storm::utility::infinity<ValueType>()
                                                      : -storm::utility::infinity<ValueType>();
                            uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
                            ValueType currentValue;
                            for (uint64_t action = 0; action < numChoices; ++action) {
                                currentValue = storm::utility::zero<ValueType>(); // simply change this for rewards?
                                for (auto iter = observationProbabilities[i][action].begin();
                                     iter != observationProbabilities[i][action].end(); ++iter) {
                                    uint32_t observation = iter->first;
                                    Belief<ValueType> nextBelief = nextBelieves[i][action][observation];
                                    // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
                                    std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
                                            currentBelief.probabilities, gridResolution);
                                    std::vector<std::vector<ValueType>> subSimplex = temp.first;
                                    std::vector<ValueType> lambdas = temp.second;
                                    ValueType sum = storm::utility::zero<ValueType>();
                                    for (size_t j = 0; j < lambdas.size(); ++j) {
                                        if (lambdas[j] != storm::utility::zero<ValueType>()) {
                                            sum += lambdas[j] * result_backup.at(
                                                    getBeliefIdInGrid(beliefGrid, observation, subSimplex[j]));
                                        }
                                    }
                                    currentValue += iter->second * sum;
                                }
                                // Update the selected actions
                                auto cc = storm::utility::ConstantsComparator<ValueType>();
                                if ((min && cc.isLess(storm::utility::zero<ValueType>(), bestValue - currentValue)) ||
                                    (!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - bestValue))) {
                                    improvement = true;
                                    bestValue = currentValue;
                                    chosenActionIndex = action;
                                }
                                // TODO tie breaker?
                            }
                            result[currentBelief.id] = bestValue;
                        }
                    }
                    finished = !improvement;
                    // back up
                    for (auto iter = result.begin(); iter != result.end(); ++iter) {
                        result_backup[iter->first] = result[iter->first];
                    }
                    ++iteration;
                }
                // maybe change this so the initial Belief always has ID 0
                Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId);
                ++nextId;
                std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
                        initialBelief.probabilities, gridResolution);
                std::vector<std::vector<ValueType>> subSimplex = temp.first;
                std::vector<ValueType> lambdas = temp.second;
                ValueType overApprox = storm::utility::zero<ValueType>();
                for (size_t j = 0; j < lambdas.size(); ++j) {
                    if (lambdas[j] != storm::utility::zero<ValueType>()) {
                        overApprox += lambdas[j] *
                                      result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation,
                                                                      subSimplex[j])];
                    }
                }
                STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox);
            }
            template<typename ValueType, typename RewardModelType>
            uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInGrid(
                    std::vector<Belief<ValueType>> &grid, uint32_t observation, std::vector<ValueType> probabilities) {
                for (auto const &belief : grid) {
                    if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) {
                        if (belief.probabilities == probabilities) {
                            STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id));
                            return belief.id;
                        }
                    }
                }
                STORM_LOG_DEBUG("Did not find the belief in the grid");
                return -1;
            }
            template<typename ValueType, typename RewardModelType>
            Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(
                    storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id) {
                STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1,
                                 "POMDP contains more than one initial state");
                std::vector<ValueType> distribution(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>());
                uint32_t observation = 0;
                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
                    if (pomdp.getInitialStates()[state] == 1) {
                        distribution[state] = storm::utility::one<ValueType>();
                        observation = pomdp.getObservation(state);
                    }
                }
                return Belief<ValueType>{id, observation, distribution};
            }
            template<typename ValueType, typename RewardModelType>
            void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::constructBeliefGrid(
                    storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                    std::set<uint32_t> target_observations, uint64_t gridResolution,
                    std::vector<Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown) {
                bool isTarget;
                uint64_t newId = 0;
                for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) {
                    std::vector<uint64_t> statesWithObservation = pomdp.getStatesWithObservation(observation);
                    isTarget = target_observations.find(observation) != target_observations.end();
                    // TODO this can probably be condensed
                    if (statesWithObservation.size() == 1) {
                        // If there is only one state with the observation, we can directly add the corresponding belief
                        std::vector<ValueType> distribution(pomdp.getNumberOfStates(),
                                                            storm::utility::zero<ValueType>());
                        distribution[statesWithObservation.front()] = storm::utility::one<ValueType>();
                        Belief<ValueType> belief = {newId, observation, distribution};
                        STORM_LOG_TRACE(
                                "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << "),"
                                              << distribution << "]");
                        grid.push_back(belief);
                        beliefIsKnown.push_back(isTarget);
                        ++newId;
                    } else {
                        // Otherwise we have to enumerate all possible distributions with regards to the grid
                        // helper is used to derive the distribution of the belief
                        std::vector<uint64_t> helper(statesWithObservation.size(), 0);
                        helper[0] = gridResolution;
                        bool done = false;
                        uint64_t index = 0;
                        while (!done) {
                            std::vector<ValueType> distribution(pomdp.getNumberOfStates(),
                                                                storm::utility::zero<ValueType>());
                            for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) {
                                distribution[statesWithObservation[i]] = ValueType(
                                        double(helper[i] - helper[i + 1]) / gridResolution);
                            }
                            distribution[statesWithObservation.back()] = ValueType(
                                    double(helper[statesWithObservation.size() - 1]) / gridResolution);
                            Belief<ValueType> belief = {newId, observation, distribution};
                            STORM_LOG_TRACE(
                                    "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation)
                                                  << ")," << distribution << "]");
                            grid.push_back(belief);
                            beliefIsKnown.push_back(isTarget);
                            if (helper[statesWithObservation.size() - 1] == gridResolution) {
                                // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions
                                done = true;
                            } else {
                                // Update helper by finding the index to increment
                                index = statesWithObservation.size() - 1;
                                while (helper[index] == helper[index - 1]) {
                                    --index;
                                }
                                STORM_LOG_ASSERT(index > 0, "Error in BeliefGrid generation - index wrong");
                                // Increment the value at the index
                                ++helper[index];
                                // Reset all indices greater than the changed one to 0
                                ++index;
                                while (index < statesWithObservation.size()) {
                                    helper[index] = 0;
                                    ++index;
                                }
                            }
                            ++newId;
                        }
                    }
                }
            }
            template<typename ValueType, typename RewardModelType>
            std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeSubSimplexAndLambdas(
                    std::vector<ValueType> probabilities, uint64_t resolution) {
                // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
                // Variable names are based on the paper
                std::vector<ValueType> x(probabilities.size(), storm::utility::zero<ValueType>());
                std::vector<ValueType> v(probabilities.size(), storm::utility::zero<ValueType>());
                std::vector<ValueType> d(probabilities.size(), storm::utility::zero<ValueType>());
                for (size_t i = 0; i < probabilities.size(); ++i) {
                    for (size_t j = i; j < probabilities.size(); ++j) {
                        x[i] += storm::utility::convertNumber<ValueType>(resolution) * probabilities[j];
                    }
                    v[i] = storm::utility::floor(x[i]);
                    d[i] = x[i] - v[i];
                }
                auto p = storm::utility::vector::getSortedIndices(d);
                std::vector<std::vector<ValueType>> qs;
                for (size_t i = 0; i < probabilities.size(); ++i) {
                    std::vector<ValueType> q;
                    if (i == 0) {
                        for (size_t j = 0; j < probabilities.size(); ++j) {
                            q[i] = v[i];
                        }
                        qs.push_back(q);
                    } else {
                        for (size_t j = 0; j < probabilities.size(); ++j) {
                            if (j == p[i - 1]) {
                                q[j] = qs[i - 1][j] + storm::utility::one<ValueType>();
                            } else {
                                q[j] = qs[i - 1][j];
                            }
                        }
                        qs.push_back(q);
                    }
                }
                std::vector<std::vector<ValueType>> subSimplex;
                for (auto q : qs) {
                    std::vector<ValueType> node;
                    for (size_t i = 0; i < probabilities.size(); ++i) {
                        if (i != probabilities.size() - 1) {
                            node.push_back((q[i] - q[i + 1]) / storm::utility::convertNumber<ValueType>(resolution));
                        } else {
                            node.push_back(q[i] / storm::utility::convertNumber<ValueType>(resolution));
                        }
                    }
                    subSimplex.push_back(node);
                }
                std::vector<ValueType> lambdas(probabilities.size(), storm::utility::zero<ValueType>());
                auto sum = storm::utility::zero<ValueType>();
                for (size_t i = 1; i < probabilities.size(); ++i) {
                    lambdas[i] = d[p[i - 1]] - d[p[i]];
                    sum += d[p[i - 1]] - d[p[i]];
                }
                lambdas[0] = storm::utility::one<ValueType>() - sum;
                //TODO add assertion that we are close enough
                return std::make_pair(subSimplex, lambdas);
            }
            template<typename ValueType, typename RewardModelType>
            std::map<uint32_t, ValueType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction(
                    storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
                    uint64_t actionIndex) {
                std::map<uint32_t, ValueType> res;
                // the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
                std::vector<ValueType> postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex,
                                                                                0).probabilities;
                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
                    uint32_t observation = pomdp.getObservation(state);
                    if (postProbabilities[state] != storm::utility::zero<ValueType>()) {
                        if (res.count(observation) == 0) {
                            res[observation] = postProbabilities[state];
                        } else {
                            res[observation] += postProbabilities[state];
                        }
                    }
                }
                return res;
            }
            template<typename ValueType, typename RewardModelType>
            Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterAction(
                    storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
                    uint64_t actionIndex, uint64_t id) {
                std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>());
                uint32_t observation = 0;
                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
                    if (belief.probabilities[state] != storm::utility::zero<ValueType>()) {
                        auto row = pomdp.getTransitionMatrix().getRow(
                                pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
                        for (auto const &entry : row) {
                            observation = pomdp.getObservation(entry.getColumn());
                            distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
                        }
                    }
                }
                /* Should not be necessary
                // We have to normalize the distribution
                auto sum = storm::utility::zero<ValueType>();
                for(ValueType const& entry : distributionAfter){
                    sum += entry;
                }
                for(size_t i = 0; i  < pomdp.getNumberOfStates(); ++i){
                    distributionAfter[i] /= sum;
                }*/
                return Belief<ValueType>{id, observation, distributionAfter};
            }
            template<typename ValueType, typename RewardModelType>
            Belief<ValueType>
            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(
                    storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
                    uint64_t actionIndex, uint32_t observation, uint64_t id) {
                std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>());
                for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
                    if (belief.probabilities[state] != storm::utility::zero<ValueType>()) {
                        auto row = pomdp.getTransitionMatrix().getRow(
                                pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
                        for (auto const &entry : row) {
                            if (pomdp.getObservation(entry.getColumn()) == observation) {
                                distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
                            }
                        }
                    }
                }
                // We have to normalize the distribution
                auto sum = storm::utility::zero<ValueType>();
                for (ValueType const &entry : distributionAfter) {
                    sum += entry;
                }
                for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) {
                    distributionAfter[i] /= sum;
                }
                return Belief<ValueType>{id, observation, distributionAfter};
            }
            template
            class ApproximatePOMDPModelchecker<double>;
 #ifdef STORM_HAVE_CARL
            //template class ApproximatePOMDPModelchecker<storm::RationalFunction>;
            template
            class ApproximatePOMDPModelchecker<storm::RationalNumber>;
 #endif
        }
    }
 }
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -0,0 +1,118 @@
 #include <cstdlib>
 #include "storm/modelchecker/CheckTask.h"
 #include "storm/models/sparse/Pomdp.h"
 #include "storm/utility/logging.h"
 namespace storm {
    namespace pomdp {
        namespace modelchecker {
            class POMDPCheckResult;
            // Structure used to represent a belief
            template<typename ValueType>
            struct Belief {
                uint64_t id;
                uint32_t observation;
                //TODO make this sparse?
                std::vector<ValueType> probabilities;
            };
            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel <ValueType>>
            class ApproximatePOMDPModelchecker {
            public:
                explicit ApproximatePOMDPModelchecker();
                /*std::unique_ptr<POMDPCheckResult>*/ void
                computeReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                               std::set<uint32_t> target_observations, bool min,
                                               uint64_t gridResolution);
                std::unique_ptr<POMDPCheckResult>
                computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                          std::set<uint32_t> target_observations, uint64_t gridResolution);
            private:
                /**
                 *
                 * @param pomdp
                 * @param id
                 * @return
                 */
                Belief<ValueType>
                getInitialBelief(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id);
                /**
                 *
                 * @param probabilities
                 * @param gridResolution
                 * @return
                 */
                std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>>
                computeSubSimplexAndLambdas(std::vector<ValueType> probabilities, uint64_t gridResolution);
                /**
                 *  Helper method to construct the grid of Belief states to approximate the POMDP
                 *
                 * @param pomdp
                 * @param gridResolution
                 *
                 */
                void constructBeliefGrid(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                         std::set<uint32_t> target_observations, uint64_t gridResolution,
                                         std::vector<Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown);
                /**
                 * Helper method to get the probabilities of each observation after performing an action
                 *
                 * @param pomdp
                 * @param belief
                 * @param actionIndex
                 * @return
                 */
                std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction(
                        storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
                        uint64_t actionIndex);
                /**
                 * Helper method to get the next belief that results from a belief by performing an action and observing an observation
                 *
                 * @param pomdp the POMDP on which the evaluation should be performed
                 * @param belief the starting belief
                 * @param actionIndex the index of the action to be performed
                 * @param observation the observation after the action was performed
                 * @return the resulting belief (observation and distribution)
                 */
                Belief<ValueType>
                getBeliefAfterActionAndObservation(const models::sparse::Pomdp <ValueType, RewardModelType> &pomdp,
                                                   Belief<ValueType> belief,
                                                   uint64_t actionIndex, uint32_t observation, uint64_t id);
                /**
                 * Helper method to get the next belief that results from a belief by performing an action
                 *
                 * @param pomdp
                 * @param belief
                 * @param actionIndex
                 * @return
                 */
                Belief<ValueType>
                getBeliefAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                     Belief<ValueType> belief, uint64_t actionIndex, uint64_t id);
                /**
                 * Helper to get the id of a Belief in the grid
                 *
                 * @param observation
                 * @param probabilities
                 * @return
                 */
                uint64_t getBeliefIdInGrid(std::vector<Belief<ValueType>> &grid, uint32_t observation,
                                           std::vector<ValueType> probabilities);
            };
        }
    }
 }
--- a/src/storm/models/sparse/Pomdp.cpp
+++ b/src/storm/models/sparse/Pomdp.cpp
@ -59,8 +59,17 @@ namespace storm {
                return observations;
            }
            template<typename ValueType, typename RewardModelType>
            std::vector<uint64_t>
            Pomdp<ValueType, RewardModelType>::getStatesWithObservation(uint32_t observation) const {
                std::vector<uint64_t> result;
                for (uint64_t state = 0; state < this->getNumberOfStates(); ++state) {
                    if (this->getObservation(state) == observation) {
                        result.push_back(state);
                    }
                }
                return result;
            }
            template class Pomdp<double>;
            template class Pomdp<storm::RationalNumber>;
--- a/src/storm/models/sparse/Pomdp.h
+++ b/src/storm/models/sparse/Pomdp.h
@ -61,7 +61,7 @@ namespace storm {
                std::vector<uint32_t> const& getObservations() const;
                std::vector<uint64_t> getStatesWithObservation(uint32_t observation) const;
            protected:
                // TODO: consider a bitvector based presentation (depending on our needs).