Added a new model checker that allows to compute trivial (but sound) bounds on the value of POMDP states

6 years ago · 37da2b4e1f
3 changed files with 131 additions and 1 deletions
--- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
+++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
@ -0,0 +1,115 @@
 #pragma once
 #include "storm-pomdp/analysis/FormulaInformation.h"
 #include "storm/api/verification.h"
 #include "storm/models/sparse/Pomdp.h"
 #include "storm/models/sparse/StandardRewardModel.h"
 #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
 #include "storm/storage/Scheduler.h"
 #include "storm/utility/macros.h"
 #include "storm/exceptions/UnexpectedException.h"
 #include "storm/exceptions/NotSupportedException.h"
 namespace storm {
    namespace pomdp {
        namespace modelchecker {
            template <typename PomdpType>
            class TrivialPomdpValueBoundsModelChecker {
            public:
                typedef typename PomdpType::ValueType ValueType;
                TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) {
                    // Intentionally left empty
                }
                struct ValueBounds {
                    std::vector<ValueType> lower;
                    std::vector<ValueType> upper;
                };
                ValueBounds getValueBounds(storm::logic::Formula const& formula) {
                    return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula));
                }
                ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) {
                    STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis.");
                    // Compute the values on the fully observable MDP
                    // We need an actual MDP here so that the apply scheduler method below will work.
                    // Also, the api call in the next line will require a copy anyway.
                    auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
                    auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
                    STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
                    STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
                    std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
                    // Create some positional scheduler for the POMDP
                    storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
                    // For each state, we heuristically find a good distribution over output actions.
                    std::vector<ValueType> fullyObservableChoiceValues(pomdp.getNumberOfChoices());
                    if (info.isNonNestedExpectedRewardFormula()) {
                        std::vector<ValueType> actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
                        pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards);
                    } else {
                        pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues);
                    }
                    auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
                    for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
                        auto obsStates = pomdp.getStatesWithObservation(obs);
                        storm::storage::Distribution<ValueType, uint_fast64_t> choiceDistribution;
                        for (auto const &state : obsStates) {
                            ValueType const& stateValue = fullyObservableResult[state];
                            assert(stateValue >= storm::utility::zero<ValueType>());
                            for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
                                ValueType const& choiceValue = fullyObservableChoiceValues[choice];
                                assert(choiceValue >= storm::utility::zero<ValueType>());
                                // Rate this choice by considering the relative difference between the choice value and the (optimal) state value
                                ValueType choiceRating;
                                if (stateValue < choiceValue) {
                                    choiceRating = choiceValue - stateValue;
                                    if (!storm::utility::isZero(choiceValue)) {
                                        choiceRating /= choiceValue;
                                    }
                                } else {
                                    choiceRating = stateValue - choiceValue;
                                    if (!storm::utility::isZero(stateValue)) {
                                        choiceRating /= stateValue;
                                    }
                                }
                                assert(choiceRating <= storm::utility::one<ValueType>());
                                assert(choiceRating >= storm::utility::zero<ValueType>());
                                // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice
                                if (choiceRating <= storm::utility::convertNumber<ValueType>(0.5)) {
                                    choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one<ValueType>() - choiceRating);
                                }
                            }
                        }
                        choiceDistribution.normalize();
                        for (auto const& state : obsStates) {
                            pomdpScheduler.setChoice(choiceDistribution, state);
                        }
                    }
                    auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
                    auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
                    STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained.");
                    STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
                    std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
                    // Finally prepare the result
                    ValueBounds result;
                    if (info.minimize()) {
                        result.lower = std::move(fullyObservableResult);
                        result.upper = std::move(pomdpSchedulerResult);
                    } else {
                        result.lower = std::move(pomdpSchedulerResult);
                        result.upper = std::move(fullyObservableResult);
                    }
                    return result;
                }
            private:
                PomdpType const& pomdp;
            };
        }
    }
 }
--- a/src/storm/storage/Distribution.cpp
+++ b/src/storm/storage/Distribution.cpp
@ -166,7 +166,17 @@ namespace storm {
            }
        }
        template<typename ValueType, typename StateType>
        void Distribution<ValueType, StateType>::normalize() {
            ValueType sum = storm::utility::zero<ValueType>();
            for (auto const& entry: distribution) {
                sum += entry.second;
            }
            for (auto& entry: distribution) {
                entry.second /= sum;
            }
        }
        template class Distribution<double>;
        template std::ostream& operator<<(std::ostream& out, Distribution<double> const& distribution);
--- a/src/storm/storage/Distribution.h
+++ b/src/storm/storage/Distribution.h
@ -144,6 +144,11 @@ namespace storm {
             */
            ValueType getProbability(StateType const& state) const;
            /*!
             * Normalizes the distribution such that the values sum up to one.
             */
            void normalize();
        private:
            // A list of states and the probabilities that are assigned to them.
            container_type distribution;