WIP Sound LRA for games

2 years ago · 26094b08ce
3 changed files with 431 additions and 2 deletions
--- a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp
+++ b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp
@ -0,0 +1,271 @@
+#include "SparseSmgLraHelper.h"
+
+#include "storm/storage/MaximalEndComponent.h"
+#include "storm/storage/StronglyConnectedComponent.h"
+
+#include "storm/utility/graph.h"
+#include "storm/utility/vector.h"
+#include "storm/utility/macros.h"
+#include "storm/utility/SignalHandler.h"
+
+#include "storm/environment/solver/SolverEnvironment.h"
+#include "storm/environment/solver/LongRunAverageSolverEnvironment.h"
+#include "storm/environment/solver/MinMaxSolverEnvironment.h"
+#include "storm/environment/solver/MultiplierEnvironment.h"
+#include "storm/environment/solver/GameSolverEnvironment.h"
+
+#include "modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.h"
+#include "storm/exceptions/UnmetRequirementException.h"
+
+namespace storm {
+    namespace modelchecker {
+        namespace helper {
+            namespace internal {
+
+                template <typename ValueType>
+                SparseSmgLraHelper<ValueType>::SparseSmgLraHelper(storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::BitVector const statesOfCoalition) : _transitionMatrix(transitionMatrix), _x1IsCurrent(false), _x1IsCurrentStrategyVI(false), _statesOfCoalition(statesOfCoalition) {
+
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> SparseSmgLraHelper<ValueType>::computeLongRunAverageRewardsSound(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) {
+                    STORM_LOG_DEBUG("Transition Matrix:\n" << _transitionMatrix);
+                    std::vector<ValueType> result;
+                    std::vector<ValueType>  stateRewardsGetter;
+                    if (rewardModel.hasStateRewards()) {
+                        stateRewardsGetter = rewardModel.getStateRewardVector();
+                    }
+                    ValueGetter actionRewardsGetter;
+                    if (rewardModel.hasStateActionRewards() || rewardModel.hasTransitionRewards()) {
+                        if (rewardModel.hasTransitionRewards()) {
+                            actionRewardsGetter = [&] (uint64_t globalChoiceIndex) { return rewardModel.getStateActionAndTransitionReward(globalChoiceIndex, this->_transitionMatrix); };
+                        } else {
+                            actionRewardsGetter = [&] (uint64_t globalChoiceIndex) { return rewardModel.getStateActionReward(globalChoiceIndex); };
+                        }
+                    } else {
+                        actionRewardsGetter = [] (uint64_t) { return storm::utility::zero<ValueType>(); };
+                    }
+                    STORM_LOG_DEBUG("rewards: " << rewardModel.getStateRewardVector());
+                    prepareMultiplier(env, rewardModel);
+                    performValueIteration(env, rewardModel, stateRewardsGetter, actionRewardsGetter, result);
+
+                    return result;
+                }
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::performValueIteration(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel, std::vector<ValueType> const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector<ValueType>& result, std::vector<uint64_t>* choices, std::vector<ValueType>* choiceValues)
+                {
+                    std::vector<uint64_t> choicesForStrategies = std::vector<uint64_t>(_transitionMatrix.getRowGroupCount(), 0);
+                    ValueType precision = storm::utility::convertNumber<ValueType>(env.solver().game().getPrecision());
+
+                    do
+                    {
+                        _x1IsCurrent = !_x1IsCurrent;
+                        // Convergent recommender procedure
+
+                        _multiplier->multiplyAndReduce(env, _optimizationDirection, xOld(), nullptr, xNew(), &choicesForStrategies, &_statesOfCoalition);
+                        for (size_t i = 0; i < xNew().size(); i++)
+                        {
+                            xNew()[i] = xNew()[i] + stateValueGetter[i];
+                        }
+
+                        storm::storage::BitVector fixedMaxStrat = getStrategyFixedBitVec(choicesForStrategies, MinMaxStrategy::MaxStrategy);
+                        storm::storage::BitVector fixedMinStrat = getStrategyFixedBitVec(choicesForStrategies, MinMaxStrategy::MinStrategy);
+                        storm::storage::SparseMatrix<ValueType> restrictedMaxMatrix = _transitionMatrix.restrictRows(fixedMaxStrat);
+
+                        storm::storage::SparseMatrix<ValueType> restrictedMinMatrix = _transitionMatrix.restrictRows(fixedMinStrat);
+
+                        // compute bounds
+                        storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper<ValueType> MaxSolver(restrictedMaxMatrix);
+                        MaxSolver.setOptimizationDirection(OptimizationDirection::Minimize);
+                        std::vector<ValueType> resultForMax = MaxSolver.computeLongRunAverageRewards(env, rewardModel);
+
+                        for (size_t i = 0; i < xNewL().size(); i++)
+                        {
+                            xNewL()[i] = std::max(xOldL()[i], resultForMax[i]);
+                        }
+
+                        storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper<ValueType> MinSolver(restrictedMinMatrix);
+                        MinSolver.setOptimizationDirection(OptimizationDirection::Maximize);
+                        std::vector<ValueType> resultForMin = MinSolver.computeLongRunAverageRewards(env, rewardModel);
+
+                        for (size_t i = 0; i < xNewU().size(); i++)
+                        {
+                            xNewU()[i] = std::min(xOldU()[i], resultForMin[i]);
+                        }
+
+                        STORM_LOG_DEBUG("xL " << xNewL());
+                        STORM_LOG_DEBUG("xU " << xNewU());
+
+                    } while (!checkConvergence(precision));
+                    result = xNewU();
+                }
+
+
+                template <typename ValueType>
+                storm::storage::BitVector SparseSmgLraHelper<ValueType>::getStrategyFixedBitVec(std::vector<uint64_t> const& choices, MinMaxStrategy strategy) {
+                    storm::storage::BitVector restrictBy(_transitionMatrix.getRowCount(), true);
+                    auto rowGroupIndices = this->_transitionMatrix.getRowGroupIndices();
+                    STORM_LOG_DEBUG("choices " << choices);
+
+                    for(uint state = 0; state < rowGroupIndices.size() - 1; state++) {
+                        if ((_minimizerStates[state] && strategy == MinMaxStrategy::MaxStrategy) || (!_minimizerStates[state] && strategy == MinMaxStrategy::MinStrategy))
+                            continue;
+
+                        uint rowGroupSize = rowGroupIndices[state + 1] - rowGroupIndices[state];
+                        for(uint rowGroupIndex = 0; rowGroupIndex < rowGroupSize; rowGroupIndex++) {
+                            if ((rowGroupIndex) != choices[state]) {
+                                restrictBy.set(rowGroupIndex + rowGroupIndices[state], false);
+                            }
+                        }
+                    }
+                    return restrictBy;
+                }
+
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::prepareMultiplier(const Environment& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel)
+                {
+                    _multiplier = storm::solver::MultiplierFactory<ValueType>().create(env, _transitionMatrix);
+                    _minimizerStates = _optimizationDirection == OptimizationDirection::Maximize ? _statesOfCoalition : ~_statesOfCoalition;
+
+                    _x1L = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), storm::utility::zero<ValueType>());
+                    _x2L = _x1L;
+                    _x1 = _x1L;
+                    _x2 = _x1;
+
+                    _x1U = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), std::numeric_limits<ValueType>::infinity());
+                    _x2U = _x1U;
+                }
+
+                template <typename ValueType>
+                bool SparseSmgLraHelper<ValueType>::checkConvergence(ValueType threshold) const {
+                    STORM_LOG_ASSERT(_multiplier, "tried to check for convergence without doing an iteration first.");
+                    // Now check whether the currently produced results are precise enough
+                    STORM_LOG_ASSERT(threshold > storm::utility::zero<ValueType>(), "Did not expect a non-positive threshold.");
+                    auto x1It = xNewL().begin();
+                    auto x1Ite = xNewL().end();
+                    auto x2It = xNewU().begin();
+                    ValueType maxDiff = ((*x2It) - (*x1It));
+                    ValueType minDiff = maxDiff;
+                    // The difference between maxDiff and minDiff is zero at this point. Thus, it doesn't make sense to check the threshold now.
+                    for (++x1It, ++x2It; x1It != x1Ite; ++x1It, ++x2It) {
+                        ValueType diff = (*x2It - *x1It);
+                        // Potentially update maxDiff or minDiff
+                        bool skipCheck = false;
+                        if (maxDiff < diff) {
+                            maxDiff = diff;
+                        } else if (minDiff > diff) {
+                            minDiff = diff;
+                        } else {
+                            skipCheck = true;
+                        }
+                        // Check convergence
+                        if (!skipCheck && (maxDiff - minDiff) > threshold) {
+                            return false;
+                        }
+                    }
+                    return true;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xNewL() {
+                    return _x1IsCurrent ? _x1L : _x2L;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xNewL() const {
+                    return _x1IsCurrent ? _x1L : _x2L;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xOldL() {
+                    return _x1IsCurrent ? _x2L : _x1L;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xOldL() const {
+                    return _x1IsCurrent ? _x2L : _x1L;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xNewU() {
+                    return _x1IsCurrent ? _x1U : _x2U;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xNewU() const {
+                    return _x1IsCurrent ? _x1U : _x2U;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xOldU() {
+                    return _x1IsCurrent ? _x2U : _x1U;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xOldU() const {
+                    return _x1IsCurrent ? _x2U : _x1U;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xOld() {
+                    return _x1IsCurrent ? _x2 : _x1;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xOld() const {
+                    return _x1IsCurrent ? _x2 : _x1;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xNew() {
+                    return _x1IsCurrent ? _x1 : _x2;
+                }
+
+                template <typename ValueType>
+                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xNew() const {
+                    return _x1IsCurrent ? _x1 : _x2;
+                }
+
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::setRelevantStates(storm::storage::BitVector relevantStates){
+                    _relevantStates = relevantStates;
+                }
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::setValueThreshold(storm::logic::ComparisonType const& comparisonType, const ValueType &thresholdValue) {
+                    _valueThreshold = std::make_pair(comparisonType, thresholdValue);
+                }
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::setOptimizationDirection(storm::solver::OptimizationDirection const& direction) {
+                    _optimizationDirection = direction;
+                }
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::setProduceScheduler(bool value) {
+                    _produceScheduler = value;
+                }
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::setProduceChoiceValues(bool value) {
+                    _produceChoiceValues = value;
+                }
+
+                template <typename ValueType>
+                void SparseSmgLraHelper<ValueType>::setQualitative(bool value) {
+                    _isQualitativeSet = value;
+                }
+
+                template class SparseSmgLraHelper<double>;
+#ifdef STORM_HAVE_CARL
+                template class SparseSmgLraHelper<storm::RationalNumber>;
+#endif
+
+            }
+        }
+    }
+}
+
--- a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h
+++ b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h
@ -0,0 +1,154 @@
+#pragma once
+
+#include "storm/storage/SparseMatrix.h"
+#include "storm/storage/BitVector.h"
+#include "storm/solver/LinearEquationSolver.h"
+#include "storm/solver/MinMaxLinearEquationSolver.h"
+#include "storm/solver/Multiplier.h"
+#include "storm/logic/ComparisonType.h"
+
+
+namespace storm {
+    class Environment;
+
+
+    namespace modelchecker {
+        namespace helper {
+            namespace internal {
+
+                enum class MinMaxStrategy {
+                MaxStrategy,
+                MinStrategy
+            };
+
+                template <typename ValueType>
+                class SparseSmgLraHelper {
+                public:
+                    /// Function mapping from indices to values
+                    typedef std::function<ValueType(uint64_t)> ValueGetter;
+
+                    SparseSmgLraHelper(storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::BitVector const statesOfCoalition);
+
+                    /*!
+                     * Performs value iteration with the given state- and action values.
+                     * @param env The environment, containing information on the precision of this computation.
+                     * @param stateValueGetter function that returns for each state index (w.r.t. the input transition matrix) the reward for staying in state. Will only be called for timed states.
+                     * @param actionValueGetter function that returns for each global choice index (w.r.t. the input transition matrix) the reward for taking that choice
+                     * @param exitRates (as in the constructor)
+                     * @param dir Optimization direction. Must be not nullptr in case of nondeterminism
+                     * @param choices if not nullptr, the optimal choices will be inserted in this vector. The vector's size must then be equal to the number of row groups of the input transition matrix.
+                     * @return The (optimal) long run average value of the specified component.
+                     * @note it is possible to call this method multiple times with different values. However, other changes to the environment or the optimization direction might not have the expected effect due to caching.
+                     */
+                    void performValueIteration(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel, std::vector<ValueType> const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector<ValueType>& result, std::vector<uint64_t>* choices = nullptr, std::vector<ValueType>* choiceValues = nullptr);
+
+
+
+                    void prepareMultiplier(const Environment& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel);
+
+                    std::vector<ValueType> computeLongRunAverageRewardsSound(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel);
+
+                    void setRelevantStates(storm::storage::BitVector relevantStates);
+
+                    void setValueThreshold(storm::logic::ComparisonType const& comparisonType, ValueType const& thresholdValue);
+
+                    void setOptimizationDirection(storm::solver::OptimizationDirection const& direction);
+
+                    void setProduceScheduler(bool value);
+
+                    void setProduceChoiceValues(bool value);
+
+                    void setQualitative(bool value);
+
+                private:
+
+                    /*!
+                     * Initializes the value iterations with the provided values.
+                     * Resets all information from potential previous calls.
+                     * Must be called before the first call to performIterationStep.
+                     * @param stateValueGetter Function that returns for each state index (w.r.t. the input transitions) the value (e.g. reward) for that state
+                     * @param stateValueGetter Function that returns for each global choice index (w.r.t. the input transitions) the value (e.g. reward) for that choice
+                     */
+                    void initializeNewValues(ValueGetter const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector<ValueType> const* exitRates = nullptr);
+
+                    bool checkConvergence(ValueType threshold) const;
+
+
+                    /*!
+                     * Performs a single iteration step.
+                     * @param env The environment.
+                     * @param dir The optimization direction. Has to be given if there is nondeterminism (otherwise it will be ignored)
+                     * @param choices If given, the optimal choices will be inserted at the appropriate states.
+                     *                Note that these choices will be inserted w.r.t. the original model states/choices, i.e. the size of the vector should match the state-count of the input model
+                     * @pre when calling this the first time, initializeNewValues must have been called before. Moreover, prepareNextIteration must be called between two calls of this.
+                    */
+                    void performIterationStep(Environment const& env, storm::solver::OptimizationDirection const* dir = nullptr, std::vector<uint64_t>* choices = nullptr, std::vector<ValueType>* choiceValues = nullptr);
+
+                    struct ConvergenceCheckResult {
+                        bool isPrecisionAchieved;
+                        ValueType currentValue;
+                    };
+
+                    storm::storage::BitVector getStrategyFixedBitVec(std::vector<uint64_t> const& choices, MinMaxStrategy strategy);
+
+                    /*!
+                     * Must be called between two calls of performIterationStep.
+                     */
+                    void prepareNextIteration(Environment const& env);
+
+                    /// Prepares the necessary solvers and multipliers for doing the iterations.
+                    void prepareSolversAndMultipliers(Environment const& env, storm::solver::OptimizationDirection const* dir = nullptr);
+
+                    void setInputModelChoices(std::vector<uint64_t>& choices, std::vector<uint64_t> const& localMecChoices, bool setChoiceZeroToMarkovianStates = false, bool setChoiceZeroToProbabilisticStates = false) const;
+
+                    void setInputModelChoiceValues(std::vector<ValueType>& choiceValues, std::vector<ValueType> const& localMecChoiceValues) const;
+
+                    /// Returns true iff the given state is a timed state
+                    bool isTimedState(uint64_t const& inputModelStateIndex) const;
+
+                    std::vector<ValueType>& xNew();
+                    std::vector<ValueType> const& xNew() const;
+
+                    std::vector<ValueType>& xOld();
+                    std::vector<ValueType> const& xOld() const;
+
+                    std::vector<ValueType>& xNewL();
+                    std::vector<ValueType> const& xNewL() const;
+
+                    std::vector<ValueType>& xOldL();
+                    std::vector<ValueType> const& xOldL() const;
+
+                    std::vector<ValueType>& xNewU();
+                    std::vector<ValueType> const& xNewU() const;
+
+                    std::vector<ValueType>& xOldU();
+                    std::vector<ValueType> const& xOldU() const;
+
+                    storm::storage::SparseMatrix<ValueType> const& _transitionMatrix;
+                    storm::storage::BitVector const _statesOfCoalition;
+                    ValueType _strategyVIPrecision;
+
+
+                    storm::storage::BitVector _relevantStates;
+                    storm::storage::BitVector _minimizerStates;
+                    boost::optional<std::pair<storm::logic::ComparisonType, ValueType>> _valueThreshold;
+                    storm::solver::OptimizationDirection _optimizationDirection;
+                    bool _produceScheduler;
+                    bool _produceChoiceValues;
+                    bool _isQualitativeSet;
+
+                    ValueType _uniformizationRate;
+                    std::vector<ValueType> _x1, _x2, _x1L, _x2L, _x1U, _x2U;
+                    std::vector<ValueType> _Tsx1, _Tsx2, _TsChoiceValues;
+                    bool _x1IsCurrent;
+                    bool _x1IsCurrentStrategyVI;
+                    std::vector<ValueType> _Isx, _Isb, _IsChoiceValues;
+                    std::unique_ptr<storm::solver::Multiplier<ValueType>> _multiplier;
+                    std::unique_ptr<storm::solver::MinMaxLinearEquationSolver<ValueType>> _Solver;
+                    std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> _DetIsSolver;
+                    std::unique_ptr<storm::Environment> _IsSolverEnv;
+                };
+            }
+        }
+    }
+}
--- a/src/storm/modelchecker/rpatl/SparseSmgRpatlModelChecker.cpp
+++ b/src/storm/modelchecker/rpatl/SparseSmgRpatlModelChecker.cpp
@ -14,6 +14,8 @@

 #include "storm/modelchecker/rpatl/helper/SparseSmgRpatlHelper.h"
 #include "storm/modelchecker/helper/infinitehorizon/SparseNondeterministicGameInfiniteHorizonHelper.h"
+#include "storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h"
+
 #include "storm/modelchecker/helper/utility/SetInformationFromCheckTask.h"

 #include "storm/logic/FragmentSpecification.h"
@ -240,11 +242,12 @@ namespace storm {
        template<typename SparseSmgModelType>
        std::unique_ptr<CheckResult> SparseSmgRpatlModelChecker<SparseSmgModelType>::computeLongRunAverageRewards(Environment const& env, storm::logic::RewardMeasureType rewardMeasureType, CheckTask<storm::logic::LongRunAverageRewardFormula, ValueType> const& checkTask) {
            auto rewardModel = storm::utility::createFilteredRewardModel(this->getModel(), checkTask);
-            storm::modelchecker::helper::SparseNondeterministicGameInfiniteHorizonHelper<ValueType> helper(this->getModel().getTransitionMatrix(), statesOfCoalition);
+            storm::modelchecker::helper::internal::SparseSmgLraHelper<ValueType> helper(this->getModel().getTransitionMatrix(), statesOfCoalition);
            storm::modelchecker::helper::setInformationFromCheckTaskNondeterministic(helper, checkTask, this->getModel());
-            auto values = helper.computeLongRunAverageRewards(env, rewardModel.get());
+            auto values = helper.computeLongRunAverageRewardsSound(env, rewardModel.get());

            std::unique_ptr<CheckResult> result(new ExplicitQuantitativeCheckResult<ValueType>(std::move(values)));
+            /*
            if(checkTask.isShieldingTask()) {
                storm::storage::BitVector allStatesBv = storm::storage::BitVector(this->getModel().getTransitionMatrix().getRowGroupCount(), true);
                auto shield = tempest::shields::createQuantitativeShield<ValueType>(std::make_shared<storm::models::sparse::Smg<ValueType>>(this->getModel()), helper.getChoiceValues(), checkTask.getShieldingExpression(), checkTask.getOptimizationDirection(), allStatesBv, statesOfCoalition);
@ -253,6 +256,7 @@ namespace storm {
            if (checkTask.isProduceSchedulersSet()) {
                result->asExplicitQuantitativeCheckResult<ValueType>().setScheduler(std::make_unique<storm::storage::Scheduler<ValueType>>(helper.extractScheduler()));
            }
+             */
            return result;
        }