implemented VI based Long-run-average method for MDPs

9 years ago · 5b10b027fc
2 changed files with 146 additions and 6 deletions
--- a/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp
+++ b/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp
@ -1,4 +1,7 @@
- #include "storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h"
+#include "storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h"
+
+#include <boost/container/flat_map.hpp>
+

 #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
 #include "storm/modelchecker/hints/ExplicitModelCheckerHint.h"
@ -18,6 +21,9 @@
 #include "storm/solver/MinMaxLinearEquationSolver.h"
 #include "storm/solver/LpSolver.h"
 
+#include "storm/settings/SettingsManager.h"
+#include "storm/settings/modules/MinMaxEquationSolverSettings.h"
+
 #include "storm/exceptions/InvalidStateException.h"
 #include "storm/exceptions/InvalidPropertyException.h"
 #include "storm/exceptions/InvalidSettingsException.h"
@ -543,7 +549,7 @@ namespace storm {
                for (uint_fast64_t currentMecIndex = 0; currentMecIndex < mecDecomposition.size(); ++currentMecIndex) {
                    storm::storage::MaximalEndComponent const& mec = mecDecomposition[currentMecIndex];
                    
-                    lraValuesForEndComponents[currentMecIndex] = computeLraForMaximalEndComponent(dir, transitionMatrix, rewardModel, mec);
+                    lraValuesForEndComponents[currentMecIndex] = computeLraForMaximalEndComponent(dir, transitionMatrix, rewardModel, mec, minMaxLinearEquationSolverFactory);
                    
                    // Gather information for later use.
                    for (auto const& stateChoicesPair : mec) {
@ -668,7 +674,133 @@ namespace storm {
            
            template<typename ValueType>
            template<typename RewardModelType>
-            ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec) {
+            ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory) {
+                
+                // If the mec only consists of a single state, we compute the LRA value directly
+                if (++mec.begin() == mec.end()) {
+                    uint64_t state = mec.begin()->first;
+                    auto choiceIt = mec.begin()->second.begin();
+                    ValueType result = rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix);
+                    for (++choiceIt; choiceIt != mec.begin()->second.end(); ++choiceIt) {
+                        if (storm::solver::minimize(dir)) {
+                            result = std::min(result, rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix));
+                        } else {
+                            result = std::max(result, rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix));
+                        }
+                    }
+                    return result;
+                }
+                
+                // Solve MEC with the method specified in the settings
+                storm::solver::MinMaxMethod method = storm::settings::getModule<storm::settings::modules::MinMaxEquationSolverSettings>().getMinMaxEquationSolvingMethod();
+                if (method == storm::solver::MinMaxMethod::LinearProgramming) {
+                    return computeLraForMaximalEndComponentLP(dir, transitionMatrix, rewardModel, mec);
+                } else if (method == storm::solver::MinMaxMethod::ValueIteration) {
+                    return computeLraForMaximalEndComponentVI(dir, transitionMatrix, rewardModel, mec, minMaxLinearEquationSolverFactory);
+                } else {
+                    STORM_LOG_THROW(false, storm::exceptions::InvalidSettingsException, "Unsupported technique.");
+                }
+            }
+            
+            template<typename ValueType>
+            template<typename RewardModelType>
+            ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory) {
+                
+                // Initialize data about the mec
+                storm::storage::BitVector mecStates(transitionMatrix.getRowGroupCount(), false);
+                storm::storage::BitVector mecChoices(transitionMatrix.getRowCount(), false);
+                for (auto const& stateChoicesPair : mec) {
+                    mecStates.set(stateChoicesPair.first);
+                    for (auto const& choice : stateChoicesPair.second) {
+                        mecChoices.set(choice);
+                    }
+                }
+                
+                boost::container::flat_map<uint64_t, uint64_t> toSubModelStateMapping;
+                uint64_t currState = 0;
+                toSubModelStateMapping.reserve(mecStates.getNumberOfSetBits());
+                for (auto const& mecState : mecStates) {
+                    toSubModelStateMapping.insert(std::pair<uint64_t, uint64_t>(mecState, currState));
+                    ++currState;
+                }
+                
+                // Get a transition matrix that only considers the states and choices within the MEC
+                storm::storage::SparseMatrixBuilder<ValueType> mecTransitionBuilder(mecChoices.getNumberOfSetBits(), mecStates.getNumberOfSetBits(), 0, true, true, mecStates.getNumberOfSetBits());
+                std::vector<ValueType> choiceRewards;
+                choiceRewards.reserve(mecChoices.getNumberOfSetBits());
+                uint64_t currRow = 0;
+                ValueType selfLoopProb = storm::utility::convertNumber<ValueType>(0.1); // todo try other values
+                ValueType scalingFactor = storm::utility::one<ValueType>() - selfLoopProb;
+                for (auto const& mecState : mecStates) {
+                    mecTransitionBuilder.newRowGroup(currRow);
+                    uint64_t groupStart = transitionMatrix.getRowGroupIndices()[mecState];
+                    uint64_t groupEnd = transitionMatrix.getRowGroupIndices()[mecState + 1];
+                    for (uint64_t choice = mecChoices.getNextSetIndex(groupStart); choice < groupEnd; choice = mecChoices.getNextSetIndex(choice + 1)) {
+                        bool insertedDiagElement = false;
+                        for (auto const& entry : transitionMatrix.getRow(choice)) {
+                            uint64_t column = toSubModelStateMapping[entry.getColumn()];
+                            if (!insertedDiagElement && entry.getColumn() > mecState) {
+                                mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb);
+                                insertedDiagElement = true;
+                            }
+                            if (!insertedDiagElement && entry.getColumn() == mecState) {
+                                mecTransitionBuilder.addNextValue(currRow, column, selfLoopProb + scalingFactor * entry.getValue());
+                                insertedDiagElement = true;
+                            } else {
+                                mecTransitionBuilder.addNextValue(currRow, column,  scalingFactor * entry.getValue());
+                            }
+                        }
+                        if (!insertedDiagElement) {
+                            mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb);
+                        }
+                        
+                        // Compute the rewards obtained for this choice
+                        choiceRewards.push_back(scalingFactor * rewardModel.getTotalStateActionReward(mecState, choice, transitionMatrix));
+                        
+                        ++currRow;
+                    }
+                }
+                auto mecTransitions = mecTransitionBuilder.build();
+                STORM_LOG_ASSERT(mecTransitions.isProbabilistic(), "The MEC-Matrix is not probabilistic.");
+                
+                // start the iterations
+                ValueType precision = storm::utility::convertNumber<ValueType>(storm::settings::getModule<storm::settings::modules::MinMaxEquationSolverSettings>().getPrecision());
+                std::vector<ValueType> x(mecTransitions.getRowGroupCount(), storm::utility::zero<ValueType>());
+                std::vector<ValueType> xPrime = x;
+                auto solver = minMaxLinearEquationSolverFactory.create(std::move(mecTransitions));
+                solver->setCachingEnabled(true);
+                ValueType maxDiff, minDiff;
+                while (true) {
+                    // Compute the obtained rewards for the next step
+                    solver->repeatedMultiply(dir, x, &choiceRewards, 1);
+                    
+                    // update xPrime and check for convergence
+                    // to avoid large (and numerically unstable) x-values, we substract a reference value.
+                    auto xIt = x.begin();
+                    auto xPrimeIt = xPrime.begin();
+                    ValueType refVal = *xIt;
+                    maxDiff = *xIt - *xPrimeIt;
+                    minDiff = maxDiff;
+                    *xIt -= refVal;
+                    *xPrimeIt = *xIt;
+                    for (++xIt, ++xPrimeIt; xIt != x.end(); ++xIt, ++xPrimeIt) {
+                        ValueType diff = *xIt - *xPrimeIt;
+                        maxDiff = std::max(maxDiff, diff);
+                        minDiff = std::min(minDiff, diff);
+                        *xIt -= refVal;
+                        *xPrimeIt = *xIt;
+                    }
+
+                    if ((maxDiff - minDiff) < precision) {
+                        break;
+                    }
+                }
+                return (maxDiff + minDiff) / (storm::utility::convertNumber<ValueType>(2.0) * scalingFactor);
+            }
+            
+            template<typename ValueType>
+            template<typename RewardModelType>
+            ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec) {
                std::shared_ptr<storm::solver::LpSolver> solver = storm::utility::solver::getLpSolver("LRA for MEC");
                solver->setOptimizationDirection(invert(dir));
                
@ -817,7 +949,9 @@ namespace storm {
            template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeReachabilityRewards(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
            template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeReachabilityRewards(storm::solver::SolveGoal const& goal, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
            template std::vector<double> SparseMdpPrctlHelper<double>::computeLongRunAverageRewards(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory);
-            template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
+            template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory);
+            template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory);
+            template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec);

 #ifdef STORM_HAVE_CARL
            template class SparseMdpPrctlHelper<storm::RationalNumber>;
@ -826,7 +960,9 @@ namespace storm {
            template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeReachabilityRewards(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
            template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeReachabilityRewards(storm::solver::SolveGoal const& goal, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
            template std::vector<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeLongRunAverageRewards(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory);
-            template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
+            template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory);
+            template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory);
+            template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec);

 #endif
        }
--- a/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h
+++ b/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h
@ -75,7 +75,11 @@ namespace storm {
                static MDPSparseModelCheckingHelperReturnType<ValueType> computeReachabilityRewardsHelper(storm::solver::SolveGoal const& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, std::function<std::vector<ValueType>(uint_fast64_t, storm::storage::SparseMatrix<ValueType> const&, storm::storage::BitVector const&)> const& totalStateRewardVectorGetter, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint = ModelCheckerHint());

                template<typename RewardModelType>
-                static ValueType computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec);
+                static ValueType computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory);
+                template<typename RewardModelType>
+                static ValueType computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory);
+                template<typename RewardModelType>
+                static ValueType computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec);

            };