Game LRA Sound algorithm fully debugged

2 years ago · 8846a976f2
4 changed files with 44 additions and 157 deletions
--- a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp
+++ b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp
@ -23,13 +23,12 @@ namespace storm {
            namespace internal {

                template <typename ValueType>
-                SparseSmgLraHelper<ValueType>::SparseSmgLraHelper(storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::BitVector const statesOfCoalition) : _transitionMatrix(transitionMatrix), _x1IsCurrent(false), _x1IsCurrentStrategyVI(false), _statesOfCoalition(statesOfCoalition) {
+                SparseSmgLraHelper<ValueType>::SparseSmgLraHelper(storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::BitVector const statesOfCoalition) : _transitionMatrix(transitionMatrix), _statesOfCoalition(statesOfCoalition) {

                }

                template <typename ValueType>
                std::vector<ValueType> SparseSmgLraHelper<ValueType>::computeLongRunAverageRewardsSound(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) {
-                    // STORM_LOG_DEBUG("Transition Matrix:\n" << _transitionMatrix);
                    std::vector<ValueType> result;
                    std::vector<ValueType>  stateRewardsGetter = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), storm::utility::zero<ValueType>());
                    if (rewardModel.hasStateRewards()) {
@ -45,7 +44,7 @@ namespace storm {
                    } else {
                        actionRewardsGetter = [] (uint64_t) { return storm::utility::zero<ValueType>(); };
                    }
-                    std::vector<ValueType> b = getBVector(stateRewardsGetter, actionRewardsGetter);
+                    _b = getBVector(stateRewardsGetter, actionRewardsGetter);

                    // If requested, allocate memory for the choices made
                    if (this->_produceScheduler) {
@ -56,7 +55,7 @@ namespace storm {
                    }

                    prepareMultiplier(env, rewardModel);
-                    performValueIteration(env, rewardModel, b, actionRewardsGetter, result);
+                    performValueIteration(env, rewardModel, _b, actionRewardsGetter, result);

                    return result;
                }
@ -83,8 +82,7 @@ namespace storm {
                    auto precision = storm::utility::convertNumber<ValueType>(env.solver().lra().getPrecision());

                    Environment envMinMax = env;
-                    envMinMax.solver().lra().setPrecision(precision / 2.0);
-                    STORM_LOG_DEBUG(envMinMax.solver().lra().getPrecision());
+                    envMinMax.solver().lra().setPrecision(precision / storm::utility::convertNumber<storm::RationalNumber>(2));
                    do
                    {
                        size_t iteration_count = 0;
@ -92,23 +90,28 @@ namespace storm {

                        _multiplier->multiplyAndReduce(env, _optimizationDirection, xNew(), &b, xNew(), &choicesForStrategies, &_statesOfCoalition);

-                        if (iteration_count % 5 == 0) { // only every 5th iteration
+                        if (iteration_count % 50 == 0) { // only every 50th iteration
                        storm::storage::BitVector fixedMaxStrat = getStrategyFixedBitVec(choicesForStrategies, MinMaxStrategy::MaxStrategy);
                        storm::storage::BitVector fixedMinStrat = getStrategyFixedBitVec(choicesForStrategies, MinMaxStrategy::MinStrategy);

                        // compute bounds
                            if (fixedMaxStrat != _fixedMaxStrat) {
                                storm::storage::SparseMatrix<ValueType> restrictedMaxMatrix = _transitionMatrix.restrictRows(fixedMaxStrat);
+                                STORM_LOG_DEBUG("xL " << xNewL()[0]);

                                storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper<ValueType> MaxSolver(restrictedMaxMatrix);
+                                STORM_LOG_DEBUG("xL " << xNewL()[0]);
+
                                MaxSolver.setOptimizationDirection(OptimizationDirection::Minimize);
+                                MaxSolver.setProduceChoiceValues(false);
                                _resultForMax = MaxSolver.computeLongRunAverageRewards(envMinMax, rewardModel);
-                                STORM_LOG_DEBUG("resultMax: " << _resultForMax);
                                _fixedMaxStrat = fixedMaxStrat;
+                                STORM_LOG_DEBUG("xL " << xNewL()[0]);

                                for (size_t i = 0; i < xNewL().size(); i++) {
                                    xNewL()[i] = std::max(xNewL()[i], _resultForMax[i]);
                                }
+                                STORM_LOG_DEBUG("xL " << xNewL()[0]);
                            }

                            if (fixedMinStrat != _fixedMinStrat) {
@ -116,19 +119,17 @@ namespace storm {

                                storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper<ValueType> MinSolver(restrictedMinMatrix);
                                MinSolver.setOptimizationDirection(OptimizationDirection::Maximize);
+                                MinSolver.setProduceChoiceValues(false);
                                _resultForMin = MinSolver.computeLongRunAverageRewards(envMinMax, rewardModel);
-                                STORM_LOG_DEBUG("resultMin: " << _resultForMin);
                                _fixedMinStrat = fixedMinStrat;

                                for (size_t i = 0; i < xNewU().size(); i++) {
                                    xNewU()[i] = std::min(xNewU()[i], _resultForMin[i]);
                                }
+                                STORM_LOG_DEBUG("xU " << xNewU()[0]);
                            }
                        }

-                        STORM_LOG_DEBUG("xL " << xNewL());
-                        STORM_LOG_DEBUG("xU " << xNewU());
-
                    } while (!checkConvergence(precision));

                    if (_produceScheduler) {
@ -140,7 +141,7 @@ namespace storm {
                            this->_choiceValues.emplace();
                        }
                        this->_choiceValues->resize(this->_transitionMatrix.getRowCount());
-                        _choiceValues = calcChoiceValues(envMinMax, rewardModel);
+                        _choiceValues = calcChoiceValues(env, rewardModel);
                    }
                    result = xNewL();
                }
@ -150,7 +151,6 @@ namespace storm {
                storm::storage::BitVector SparseSmgLraHelper<ValueType>::getStrategyFixedBitVec(std::vector<uint64_t> const& choices, MinMaxStrategy strategy) {
                    storm::storage::BitVector restrictBy(_transitionMatrix.getRowCount(), true);
                    auto rowGroupIndices = this->_transitionMatrix.getRowGroupIndices();
-                    STORM_LOG_DEBUG("choices " << choices);

                    for(uint state = 0; state < _transitionMatrix.getRowGroupCount(); state++) {
                        if ((_minimizerStates[state] && strategy == MinMaxStrategy::MaxStrategy) || (!_minimizerStates[state] && strategy == MinMaxStrategy::MinStrategy))
@ -169,46 +169,8 @@ namespace storm {
                template <typename ValueType>
                std::vector<ValueType> SparseSmgLraHelper<ValueType>::calcChoiceValues(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) {
                    std::vector<ValueType> choiceValues(_transitionMatrix.getRowCount());
+                    _multiplier->multiply(env, xNewL(), nullptr, choiceValues);

-                    storm::storage::SparseMatrix<ValueType> restrictedMaxMatrix = _transitionMatrix.restrictRows(_fixedMaxStrat);
-                    storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper<ValueType> MaxSolver(restrictedMaxMatrix);
-                    MaxSolver.setOptimizationDirection(OptimizationDirection::Minimize);
-                    MaxSolver.setProduceChoiceValues(true);
-                    MaxSolver.computeLongRunAverageRewards(env, rewardModel);
-                    std::vector<ValueType> minimizerChoices = MaxSolver.getChoiceValues();
-
-                    storm::storage::SparseMatrix<ValueType> restrictedMinMatrix = _transitionMatrix.restrictRows(_fixedMinStrat);
-                    storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper<ValueType> MinSolver(restrictedMinMatrix);
-                    MinSolver.setOptimizationDirection(OptimizationDirection::Maximize);
-                    MinSolver.setProduceChoiceValues(true);
-                    MinSolver.computeLongRunAverageRewards(env, rewardModel);
-                    std::vector<ValueType> maximizerChoices = MinSolver.getChoiceValues();
-
-                    auto rowGroupIndices = this->_transitionMatrix.getRowGroupIndices();
-
-                    auto minIt = minimizerChoices.begin();
-                    auto maxIt = maximizerChoices.begin();
-                    size_t globalCounter = 0;
-                    for(uint state = 0; state < _transitionMatrix.getRowGroupCount(); state++) {
-                        uint rowGroupSize = rowGroupIndices[state + 1] - rowGroupIndices[state];
-                        for(uint rowGroupIndex = 0; rowGroupIndex < rowGroupSize; rowGroupIndex++) {
-                            if (_minimizerStates[state]) {
-                                choiceValues[globalCounter] = *minIt;
-                                minIt++;
-                            }
-                            else {
-                                choiceValues[globalCounter] = *maxIt;
-                                maxIt++;
-                            }
-                            globalCounter++;
-                        }
-                        if (_minimizerStates[state]) {
-                            maxIt++;
-                        }
-                        else {
-                            minIt++;
-                        }
-                    }
                    return choiceValues;
                }

@ -223,9 +185,11 @@ namespace storm {
                storm::storage::Scheduler<ValueType> SparseSmgLraHelper<ValueType>::extractScheduler() const{
                    auto const& optimalChoices = getProducedOptimalChoices();
                    storm::storage::Scheduler<ValueType> scheduler(optimalChoices.size());
+
                    for (uint64_t state = 0; state < optimalChoices.size(); ++state) {
                        scheduler.setChoice(optimalChoices[state], state);
                    }
+
                    return scheduler;
                }

@ -233,6 +197,7 @@ namespace storm {
                std::vector<uint64_t> const& SparseSmgLraHelper<ValueType>::getProducedOptimalChoices() const {
                    STORM_LOG_ASSERT(_produceScheduler, "Trying to get the produced optimal choices although no scheduler was requested.");
                    STORM_LOG_ASSERT(this->_producedOptimalChoices.is_initialized(), "Trying to get the produced optimal choices but none were available. Was there a computation call before?");
+
                    return this->_producedOptimalChoices.get();
                }

@ -240,12 +205,15 @@ namespace storm {
                void SparseSmgLraHelper<ValueType>::prepareMultiplier(const Environment& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel)
                {
                    _multiplier = storm::solver::MultiplierFactory<ValueType>().create(env, _transitionMatrix);
-                    _minimizerStates = _optimizationDirection == OptimizationDirection::Maximize ? _statesOfCoalition : ~_statesOfCoalition;
+                    if (_statesOfCoalition.size()) {
+                        _minimizerStates = _optimizationDirection == OptimizationDirection::Maximize ? _statesOfCoalition : ~_statesOfCoalition;
+                    }
+                    else {
+                        _minimizerStates = storm::storage::BitVector(_transitionMatrix.getRowGroupCount(), _optimizationDirection == OptimizationDirection::Minimize);
+                    }

-                    _x1L = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), storm::utility::zero<ValueType>());
-                    _x2L = _x1L;
-                    _x1 = _x1L;
-                    _x2 = _x1;
+                    _xL = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), storm::utility::zero<ValueType>());
+                    _x = _xL;

                    _fixedMaxStrat = storm::storage::BitVector(_transitionMatrix.getRowCount(), false);
                    _fixedMinStrat = storm::storage::BitVector(_transitionMatrix.getRowCount(), false);
@ -253,8 +221,7 @@ namespace storm {
                    _resultForMin = std::vector<ValueType>(_transitionMatrix.getRowGroupCount());
                    _resultForMax = std::vector<ValueType>(_transitionMatrix.getRowGroupCount());

-                    _x1U = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), std::numeric_limits<ValueType>::infinity());
-                    _x2U = _x1U;
+                    _xU = std::vector<ValueType>(_transitionMatrix.getRowGroupCount(), std::numeric_limits<ValueType>::infinity());
                }

                template <typename ValueType>
@ -276,62 +243,32 @@ namespace storm {

                template <typename ValueType>
                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xNewL() {
-                    return _x1IsCurrent ? _x1L : _x2L;
+                    return _xL;
                }

                template <typename ValueType>
                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xNewL() const {
-                    return _x1IsCurrent ? _x1L : _x2L;
-                }
-
-                template <typename ValueType>
-                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xOldL() {
-                    return _x1IsCurrent ? _x2L : _x1L;
-                }
-
-                template <typename ValueType>
-                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xOldL() const {
-                    return _x1IsCurrent ? _x2L : _x1L;
+                    return _xL;
                }

                template <typename ValueType>
                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xNewU() {
-                    return _x1IsCurrent ? _x1U : _x2U;
+                    return _xU;
                }

                template <typename ValueType>
                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xNewU() const {
-                    return _x1IsCurrent ? _x1U : _x2U;
-                }
-
-                template <typename ValueType>
-                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xOldU() {
-                    return _x1IsCurrent ? _x2U : _x1U;
-                }
-
-                template <typename ValueType>
-                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xOldU() const {
-                    return _x1IsCurrent ? _x2U : _x1U;
-                }
-
-                template <typename ValueType>
-                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xOld() {
-                    return _x1IsCurrent ? _x2 : _x1;
-                }
-
-                template <typename ValueType>
-                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xOld() const {
-                    return _x1IsCurrent ? _x2 : _x1;
+                    return _xU;
                }

                template <typename ValueType>
                std::vector<ValueType>& SparseSmgLraHelper<ValueType>::xNew() {
-                    return _x1IsCurrent ? _x1 : _x2;
+                    return _x;
                }

                template <typename ValueType>
                std::vector<ValueType> const& SparseSmgLraHelper<ValueType>::xNew() const {
-                    return _x1IsCurrent ? _x1 : _x2;
+                    return _x;
                }


--- a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h
+++ b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h
@ -29,17 +29,6 @@ namespace storm {

                    SparseSmgLraHelper(storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::BitVector const statesOfCoalition);

-                    /*!
-                     * Performs value iteration with the given state- and action values.
-                     * @param env The environment, containing information on the precision of this computation.
-                     * @param stateValueGetter function that returns for each state index (w.r.t. the input transition matrix) the reward for staying in state. Will only be called for timed states.
-                     * @param actionValueGetter function that returns for each global choice index (w.r.t. the input transition matrix) the reward for taking that choice
-                     * @param exitRates (as in the constructor)
-                     * @param dir Optimization direction. Must be not nullptr in case of nondeterminism
-                     * @param choices if not nullptr, the optimal choices will be inserted in this vector. The vector's size must then be equal to the number of row groups of the input transition matrix.
-                     * @return The (optimal) long run average value of the specified component.
-                     * @note it is possible to call this method multiple times with different values. However, other changes to the environment or the optimization direction might not have the expected effect due to caching.
-                     */
                    void performValueIteration(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel, std::vector<ValueType> const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector<ValueType>& result, std::vector<uint64_t>* choices = nullptr, std::vector<ValueType>* choiceValues = nullptr);

                    std::vector<ValueType> getChoiceValues() const;
@ -66,26 +55,8 @@ namespace storm {

                private:

-                    /*!
-                     * Initializes the value iterations with the provided values.
-                     * Resets all information from potential previous calls.
-                     * Must be called before the first call to performIterationStep.
-                     * @param stateValueGetter Function that returns for each state index (w.r.t. the input transitions) the value (e.g. reward) for that state
-                     * @param stateValueGetter Function that returns for each global choice index (w.r.t. the input transitions) the value (e.g. reward) for that choice
-                     */
-                    void initializeNewValues(ValueGetter const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector<ValueType> const* exitRates = nullptr);
-
                    bool checkConvergence(ValueType threshold) const;

-
-                    /*!
-                     * Performs a single iteration step.
-                     * @param env The environment.
-                     * @param dir The optimization direction. Has to be given if there is nondeterminism (otherwise it will be ignored)
-                     * @param choices If given, the optimal choices will be inserted at the appropriate states.
-                     *                Note that these choices will be inserted w.r.t. the original model states/choices, i.e. the size of the vector should match the state-count of the input model
-                     * @pre when calling this the first time, initializeNewValues must have been called before. Moreover, prepareNextIteration must be called between two calls of this.
-                    */
                    void performIterationStep(Environment const& env, storm::solver::OptimizationDirection const* dir = nullptr, std::vector<uint64_t>* choices = nullptr, std::vector<ValueType>* choiceValues = nullptr);

                    struct ConvergenceCheckResult {
@ -99,42 +70,17 @@ namespace storm {

                    std::vector<ValueType> calcChoiceValues(Environment const& env, storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel);

-                    /*!
-                     * Must be called between two calls of performIterationStep.
-                     */
-                    void prepareNextIteration(Environment const& env);
-
-                    /// Prepares the necessary solvers and multipliers for doing the iterations.
-                    void prepareSolversAndMultipliers(Environment const& env, storm::solver::OptimizationDirection const* dir = nullptr);
-
-                    void setInputModelChoices(std::vector<uint64_t>& choices, std::vector<uint64_t> const& localMecChoices, bool setChoiceZeroToMarkovianStates = false, bool setChoiceZeroToProbabilisticStates = false) const;
-
-                    void setInputModelChoiceValues(std::vector<ValueType>& choiceValues, std::vector<ValueType> const& localMecChoiceValues) const;
-
-                    /// Returns true iff the given state is a timed state
-                    bool isTimedState(uint64_t const& inputModelStateIndex) const;
-
                    std::vector<ValueType>& xNew();
                    std::vector<ValueType> const& xNew() const;

-                    std::vector<ValueType>& xOld();
-                    std::vector<ValueType> const& xOld() const;
-
                    std::vector<ValueType>& xNewL();
                    std::vector<ValueType> const& xNewL() const;

-                    std::vector<ValueType>& xOldL();
-                    std::vector<ValueType> const& xOldL() const;
-
                    std::vector<ValueType>& xNewU();
                    std::vector<ValueType> const& xNewU() const;

-                    std::vector<ValueType>& xOldU();
-                    std::vector<ValueType> const& xOldU() const;
-
                    storm::storage::SparseMatrix<ValueType> const& _transitionMatrix;
                    storm::storage::BitVector const _statesOfCoalition;
-                    ValueType _strategyVIPrecision;

                    storm::storage::BitVector _relevantStates;
                    storm::storage::BitVector _minimizerStates;
@ -144,17 +90,16 @@ namespace storm {
                    std::vector<ValueType> _resultForMax;
                    std::vector<ValueType> _resultForMin;

+                    std::vector<ValueType> _b;
+
                    boost::optional<std::pair<storm::logic::ComparisonType, ValueType>> _valueThreshold;
                    storm::solver::OptimizationDirection _optimizationDirection;
                    bool _produceScheduler;
                    bool _produceChoiceValues;
                    bool _isQualitativeSet;

-                    ValueType _uniformizationRate;
-                    std::vector<ValueType> _x1, _x2, _x1L, _x2L, _x1U, _x2U;
+                    std::vector<ValueType> _x, _xL, _xU;
                    std::vector<ValueType> _Tsx1, _Tsx2, _TsChoiceValues;
-                    bool _x1IsCurrent;
-                    bool _x1IsCurrentStrategyVI;
                    std::vector<ValueType> _Isx, _Isb, _IsChoiceValues;
                    std::unique_ptr<storm::solver::Multiplier<ValueType>> _multiplier;
                    std::unique_ptr<storm::solver::MinMaxLinearEquationSolver<ValueType>> _Solver;
--- a/src/storm/modelchecker/rpatl/SparseSmgRpatlModelChecker.cpp
+++ b/src/storm/modelchecker/rpatl/SparseSmgRpatlModelChecker.cpp
@ -265,8 +265,8 @@ namespace storm {

            if (env.solver().isForceSoundness()) {
                storm::modelchecker::helper::internal::SparseSmgLraHelper<ValueType> helper(this->getModel().getTransitionMatrix(), statesOfCoalition);
-                auto values = helper.computeLongRunAverageRewardsSound(env, rewardModel.get());
                storm::modelchecker::helper::setInformationFromCheckTaskNondeterministic(helper, checkTask, this->getModel());
+                auto values = helper.computeLongRunAverageRewardsSound(env, rewardModel.get());
                std::unique_ptr<CheckResult> result(new ExplicitQuantitativeCheckResult<ValueType>(std::move(values)));

                if(checkTask.isShieldingTask()) {
@ -281,8 +281,8 @@ namespace storm {
            }

            storm::modelchecker::helper::SparseNondeterministicGameInfiniteHorizonHelper<ValueType> helper(this->getModel().getTransitionMatrix(), statesOfCoalition);
-            auto values = helper.computeLongRunAverageRewards(env, rewardModel.get());
            storm::modelchecker::helper::setInformationFromCheckTaskNondeterministic(helper, checkTask, this->getModel());
+            auto values = helper.computeLongRunAverageRewards(env, rewardModel.get());
            std::unique_ptr<CheckResult> result(new ExplicitQuantitativeCheckResult<ValueType>(std::move(values)));
            if(checkTask.isShieldingTask()) {
                storm::storage::BitVector allStatesBv = storm::storage::BitVector(this->getModel().getTransitionMatrix().getRowGroupCount(), true);
--- a/src/storm/modelchecker/rpatl/helper/internal/SoundGameViHelper.cpp
+++ b/src/storm/modelchecker/rpatl/helper/internal/SoundGameViHelper.cpp
@ -22,7 +22,12 @@ namespace storm {
                void SoundGameViHelper<ValueType>::prepareSolversAndMultipliers(const Environment& env) {
                    _multiplier = storm::solver::MultiplierFactory<ValueType>().create(env, _transitionMatrix);
                    _x1IsCurrent = false;
-                    _minimizerStates = _optimizationDirection == OptimizationDirection::Maximize ? _statesOfCoalition : ~_statesOfCoalition;
+                    if (_statesOfCoalition.size()) {
+                        _minimizerStates = _optimizationDirection == OptimizationDirection::Maximize ? _statesOfCoalition : ~_statesOfCoalition;
+                    }
+                    else {
+                        _minimizerStates = storm::storage::BitVector(_transitionMatrix.getRowGroupCount(), _optimizationDirection == OptimizationDirection::Minimize);
+                    }
                    _oldPolicy = storm::storage::BitVector(_transitionMatrix.getRowCount(), false);
                    _timing = std::vector<double>(5, 0);
                }