Added new minmax method: Vi-to-Pi, which first performs value iteration with doubles, to find a good initial policy for (potentially exact) policy iteration.

8 years ago · 6e3639c8f1
8 changed files with 72 additions and 12 deletions
--- a/src/storm/settings/modules/MinMaxEquationSolverSettings.cpp
+++ b/src/storm/settings/modules/MinMaxEquationSolverSettings.cpp
@ -23,7 +23,7 @@ namespace storm {
            const std::string MinMaxEquationSolverSettings::intervalIterationSymmetricUpdatesOptionName = "symmetricupdates";

            MinMaxEquationSolverSettings::MinMaxEquationSolverSettings() : ModuleSettings(moduleName) {
-                std::vector<std::string> minMaxSolvingTechniques = {"vi", "value-iteration", "pi", "policy-iteration", "lp", "linear-programming", "rs", "ratsearch", "ii", "interval-iteration", "svi", "sound-value-iteration", "topological"};
+                std::vector<std::string> minMaxSolvingTechniques = {"vi", "value-iteration", "pi", "policy-iteration", "lp", "linear-programming", "rs", "ratsearch", "ii", "interval-iteration", "svi", "sound-value-iteration", "topological", "vi-to-pi"};
                this->addOption(storm::settings::OptionBuilder(moduleName, solvingMethodOptionName, false, "Sets which min/max linear equation solving technique is preferred.")
                                .addArgument(storm::settings::ArgumentBuilder::createStringArgument("name", "The name of a min/max linear equation solving technique.").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(minMaxSolvingTechniques)).setDefaultValueString("topological").build()).build());
                
@ -64,6 +64,8 @@ namespace storm {
                    return storm::solver::MinMaxMethod::SoundValueIteration;
                } else if (minMaxEquationSolvingTechnique == "topological") {
                    return storm::solver::MinMaxMethod::Topological;
+                } else if (minMaxEquationSolvingTechnique == "vi-to-pi") {
+                    return storm::solver::MinMaxMethod::ViToPi;
                }
                
                STORM_LOG_THROW(false, storm::exceptions::IllegalArgumentValueException, "Unknown min/max equation solving technique '" << minMaxEquationSolvingTechnique << "'.");
--- a/src/storm/settings/modules/TopologicalEquationSolverSettings.cpp
+++ b/src/storm/settings/modules/TopologicalEquationSolverSettings.cpp
@ -29,7 +29,7 @@ namespace storm {
                std::vector<std::string> linearEquationSolver = {"gmm++", "native", "eigen", "elimination"};
                this->addOption(storm::settings::OptionBuilder(moduleName, underlyingEquationSolverOptionName, true, "Sets which solver is considered for solving the underlying equation systems.")
                                .addArgument(storm::settings::ArgumentBuilder::createStringArgument("name", "The name of the used solver.").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(linearEquationSolver)).setDefaultValueString("gmm++").build()).build());
-                std::vector<std::string> minMaxSolvingTechniques = {"vi", "value-iteration", "pi", "policy-iteration", "lp", "linear-programming", "rs", "ratsearch", "ii", "interval-iteration", "svi", "sound-value-iteration"};
+                std::vector<std::string> minMaxSolvingTechniques = {"vi", "value-iteration", "pi", "policy-iteration", "lp", "linear-programming", "rs", "ratsearch", "ii", "interval-iteration", "svi", "sound-value-iteration", "vi-to-pi"};
                this->addOption(storm::settings::OptionBuilder(moduleName, underlyingMinMaxMethodOptionName, true, "Sets which minmax method is considered for solving the underlying minmax equation systems.")
                                .addArgument(storm::settings::ArgumentBuilder::createStringArgument("name", "The name of the used min max method.").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(minMaxSolvingTechniques)).setDefaultValueString("value-iteration").build()).build());
            }
@ -78,10 +78,9 @@ namespace storm {
                    return storm::solver::MinMaxMethod::IntervalIteration;
                } else if (minMaxEquationSolvingTechnique == "sound-value-iteration" || minMaxEquationSolvingTechnique == "svi") {
                    return storm::solver::MinMaxMethod::SoundValueIteration;
+                } else if (minMaxEquationSolvingTechnique == "vi-to-pi") {
+                    return storm::solver::MinMaxMethod::ViToPi;
                }
-
-                
-                
                
                STORM_LOG_THROW(false, storm::exceptions::IllegalArgumentValueException, "Unknown underlying equation solver '" << minMaxEquationSolvingTechnique << "'.");
            }
--- a/src/storm/solver/IterativeMinMaxLinearEquationSolver.cpp
+++ b/src/storm/solver/IterativeMinMaxLinearEquationSolver.cpp
@ -42,12 +42,12 @@ namespace storm {
            // Adjust the method if none was specified and we want exact or sound computations.
            auto method = env.solver().minMax().getMethod();
            
-            if (isExactMode && method != MinMaxMethod::PolicyIteration && method != MinMaxMethod::RationalSearch) {
+            if (isExactMode && method != MinMaxMethod::PolicyIteration && method != MinMaxMethod::RationalSearch && method != MinMaxMethod::ViToPi) {
                if (env.solver().minMax().isMethodSetFromDefault()) {
                    STORM_LOG_INFO("Selecting 'Policy iteration' as the solution technique to guarantee exact results. If you want to override this, please explicitly specify a different method.");
                    method = MinMaxMethod::PolicyIteration;
                } else {
-                    STORM_LOG_WARN("The selected solution method does not guarantee exact results.");
+                    STORM_LOG_WARN("The selected solution method " << toString(method) << " does not guarantee exact results.");
                }
            } else if (env.solver().isForceSoundness() && method != MinMaxMethod::SoundValueIteration && method != MinMaxMethod::IntervalIteration && method != MinMaxMethod::PolicyIteration && method != MinMaxMethod::RationalSearch) {
                if (env.solver().minMax().isMethodSetFromDefault()) {
@ -57,7 +57,7 @@ namespace storm {
                    STORM_LOG_WARN("The selected solution method does not guarantee sound results.");
                }
            }
-            STORM_LOG_THROW(method == MinMaxMethod::ValueIteration || method == MinMaxMethod::PolicyIteration || method == MinMaxMethod::RationalSearch || method == MinMaxMethod::SoundValueIteration || method == MinMaxMethod::IntervalIteration, storm::exceptions::InvalidEnvironmentException, "This solver does not support the selected method.");
+            STORM_LOG_THROW(method == MinMaxMethod::ValueIteration || method == MinMaxMethod::PolicyIteration || method == MinMaxMethod::RationalSearch || method == MinMaxMethod::SoundValueIteration || method == MinMaxMethod::IntervalIteration || method == MinMaxMethod::ViToPi, storm::exceptions::InvalidEnvironmentException, "This solver does not support the selected method.");
            return method;
        }
        
@ -80,6 +80,9 @@ namespace storm {
                case MinMaxMethod::SoundValueIteration:
                    result = solveEquationsSoundValueIteration(env, dir, x, b);
                    break;
+                case MinMaxMethod::ViToPi:
+                    result = solveEquationsViToPi(env, dir, x, b);
+                    break;
                default:
                    STORM_LOG_THROW(false, storm::exceptions::InvalidEnvironmentException, "This solver does not implement the selected solution method");
            }
@ -117,7 +120,12 @@ namespace storm {
        bool IterativeMinMaxLinearEquationSolver<ValueType>::solveEquationsPolicyIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const {
            // Create the initial scheduler.
            std::vector<storm::storage::sparse::state_type> scheduler = this->hasInitialScheduler() ? this->getInitialScheduler() : std::vector<storm::storage::sparse::state_type>(this->A->getRowGroupCount());
-            
+            return performPolicyIteration(env, dir, x, b, std::move(scheduler));
+        }
+        
+        template<typename ValueType>
+        bool IterativeMinMaxLinearEquationSolver<ValueType>::performPolicyIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b, std::vector<storm::storage::sparse::state_type>&& initialPolicy) const {
+            std::vector<storm::storage::sparse::state_type> scheduler = std::move(initialPolicy);
            // Get a vector for storing the right-hand side of the inner equation system.
            if (!auxiliaryRowGroupVector) {
                auxiliaryRowGroupVector = std::make_unique<std::vector<ValueType>>(this->A->getRowGroupCount());
@ -226,6 +234,7 @@ namespace storm {
            bool needsLinEqSolver = false;
            needsLinEqSolver |= method == MinMaxMethod::PolicyIteration;
            needsLinEqSolver |= method == MinMaxMethod::ValueIteration && (this->hasInitialScheduler() || hasInitialScheduler);
+            needsLinEqSolver |= method == MinMaxMethod::ViToPi;
            MinMaxLinearEquationSolverRequirements requirements = needsLinEqSolver ? MinMaxLinearEquationSolverRequirements(this->linearEquationSolverFactory->getRequirements(env)) : MinMaxLinearEquationSolverRequirements();

            if (method == MinMaxMethod::ValueIteration) {
@ -265,6 +274,12 @@ namespace storm {
                    requirements.requireNoEndComponents();
                }
                requirements.requireBounds(false);
+            } else if (method == MinMaxMethod::ViToPi) {
+                // Since we want to use value iteration to extract an initial scheduler, it helps to eliminate all end components first.
+                // TODO: We might get around this, as the initial value iteration scheduler is only a heuristic.
+                if (!this->hasUniqueSolution()) {
+                    requirements.requireNoEndComponents();
+                }
            } else {
                STORM_LOG_THROW(false, storm::exceptions::InvalidEnvironmentException, "Unsupported technique for iterative MinMax linear equation solver.");
            }
@ -666,6 +681,30 @@ namespace storm {
            return status == SolverStatus::Converged;
        }
        
+        template<typename ValueType>
+        bool IterativeMinMaxLinearEquationSolver<ValueType>::solveEquationsViToPi(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const {
+            // First create an (inprecise) vi solver to get a good initial strategy for the (potentially precise) policy iteration solver.
+            std::vector<storm::storage::sparse::state_type> initialSched;
+            {
+                Environment viEnv = env;
+                viEnv.solver().minMax().setMethod(MinMaxMethod::ValueIteration);
+                auto impreciseSolver = GeneralMinMaxLinearEquationSolverFactory<double>().create(viEnv, this->A->template toValueType<double>());
+                impreciseSolver->setHasUniqueSolution(this->hasUniqueSolution());
+                impreciseSolver->setTrackScheduler(true);
+                if (this->hasInitialScheduler()) {
+                    auto initSched = this->getInitialScheduler();
+                    impreciseSolver->setInitialScheduler(std::move(initSched));
+                }
+                STORM_LOG_THROW(!impreciseSolver->getRequirements(viEnv, dir).hasEnabledCriticalRequirement(), storm::exceptions::UnmetRequirementException, "The value-iteration based solver has an unmet requirement.");
+                auto xVi = storm::utility::vector::convertNumericVector<double>(x);
+                auto bVi = storm::utility::vector::convertNumericVector<double>(b);
+                impreciseSolver->solveEquations(viEnv, dir, xVi, bVi);
+                initialSched = impreciseSolver->getSchedulerChoices();
+            }
+            STORM_LOG_INFO("Found initial policy using Value Iteration. Starting Policy iteration now.");
+            return performPolicyIteration(env, dir, x, b, std::move(initialSched));
+        }
+        
        template<typename ValueType>
        bool IterativeMinMaxLinearEquationSolver<ValueType>::isSolution(storm::OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& matrix, std::vector<ValueType> const& values, std::vector<ValueType> const& b) {
            storm::utility::ConstantsComparator<ValueType> comparator;
--- a/src/storm/solver/IterativeMinMaxLinearEquationSolver.h
+++ b/src/storm/solver/IterativeMinMaxLinearEquationSolver.h
@ -36,11 +36,13 @@ namespace storm {
            
            bool solveInducedEquationSystem(Environment const& env, std::unique_ptr<LinearEquationSolver<ValueType>>& linearEquationSolver, std::vector<uint64_t> const& scheduler, std::vector<ValueType>& x, std::vector<ValueType>& subB, std::vector<ValueType> const& originalB) const;
            bool solveEquationsPolicyIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;
+            bool performPolicyIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b, std::vector<storm::storage::sparse::state_type>&& initialPolicy) const;
            bool valueImproved(OptimizationDirection dir, ValueType const& value1, ValueType const& value2) const;

            bool solveEquationsValueIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;
            bool solveEquationsIntervalIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;
            bool solveEquationsSoundValueIteration(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;
+            bool solveEquationsViToPi(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;

            bool solveEquationsRationalSearch(Environment const& env, OptimizationDirection dir, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;
            
--- a/src/storm/solver/MinMaxLinearEquationSolver.cpp
+++ b/src/storm/solver/MinMaxLinearEquationSolver.cpp
@ -192,7 +192,7 @@ namespace storm {
        std::unique_ptr<MinMaxLinearEquationSolver<ValueType>> GeneralMinMaxLinearEquationSolverFactory<ValueType>::create(Environment const& env) const {
            std::unique_ptr<MinMaxLinearEquationSolver<ValueType>> result;
            auto method = env.solver().minMax().getMethod();
-            if (method == MinMaxMethod::ValueIteration || method == MinMaxMethod::PolicyIteration || method == MinMaxMethod::RationalSearch || method == MinMaxMethod::IntervalIteration || method == MinMaxMethod::SoundValueIteration) {
+            if (method == MinMaxMethod::ValueIteration || method == MinMaxMethod::PolicyIteration || method == MinMaxMethod::RationalSearch || method == MinMaxMethod::IntervalIteration || method == MinMaxMethod::SoundValueIteration || method == MinMaxMethod::ViToPi) {
                result = std::make_unique<IterativeMinMaxLinearEquationSolver<ValueType>>(std::make_unique<GeneralLinearEquationSolverFactory<ValueType>>());
            } else if (method == MinMaxMethod::Topological) {
                result = std::make_unique<TopologicalMinMaxLinearEquationSolver<ValueType>>();
@ -211,7 +211,7 @@ namespace storm {
        std::unique_ptr<MinMaxLinearEquationSolver<storm::RationalNumber>> GeneralMinMaxLinearEquationSolverFactory<storm::RationalNumber>::create(Environment const& env) const {
            std::unique_ptr<MinMaxLinearEquationSolver<storm::RationalNumber>> result;
            auto method = env.solver().minMax().getMethod();
-            if (method == MinMaxMethod::ValueIteration || method == MinMaxMethod::PolicyIteration || method == MinMaxMethod::RationalSearch || method == MinMaxMethod::IntervalIteration || method == MinMaxMethod::SoundValueIteration) {
+            if (method == MinMaxMethod::ValueIteration || method == MinMaxMethod::PolicyIteration || method == MinMaxMethod::RationalSearch || method == MinMaxMethod::IntervalIteration || method == MinMaxMethod::SoundValueIteration || method == MinMaxMethod::ViToPi) {
                result = std::make_unique<IterativeMinMaxLinearEquationSolver<storm::RationalNumber>>(std::make_unique<GeneralLinearEquationSolverFactory<storm::RationalNumber>>());
            } else if (method == MinMaxMethod::LinearProgramming) {
                result = std::make_unique<LpMinMaxLinearEquationSolver<storm::RationalNumber>>(std::make_unique<storm::utility::solver::LpSolverFactory<storm::RationalNumber>>());
--- a/src/storm/solver/SolverSelectionOptions.cpp
+++ b/src/storm/solver/SolverSelectionOptions.cpp
@ -20,6 +20,8 @@ namespace storm {
                    return "soundvalueiteration";
                case MinMaxMethod::TopologicalCuda:
                    return "topologicalcuda";
+                case MinMaxMethod::ViToPi:
+                    return "vi-to-pi";
            }
            return "invalid";
        }
--- a/src/storm/solver/SolverSelectionOptions.h
+++ b/src/storm/solver/SolverSelectionOptions.h
@ -6,7 +6,7 @@

 namespace storm {
    namespace solver {
-        ExtendEnumsWithSelectionField(MinMaxMethod, PolicyIteration, ValueIteration, LinearProgramming, Topological, RationalSearch, IntervalIteration, SoundValueIteration, TopologicalCuda)
+        ExtendEnumsWithSelectionField(MinMaxMethod, PolicyIteration, ValueIteration, LinearProgramming, Topological, RationalSearch, IntervalIteration, SoundValueIteration, TopologicalCuda, ViToPi)
        ExtendEnumsWithSelectionField(MultiplierType, Native, Gmmxx)
        ExtendEnumsWithSelectionField(GameMethod, PolicyIteration, ValueIteration)
        ExtendEnumsWithSelectionField(LraMethod, LinearProgramming, ValueIteration)
--- a/src/test/storm/modelchecker/MdpPrctlModelCheckerTest.cpp
+++ b/src/test/storm/modelchecker/MdpPrctlModelCheckerTest.cpp
@ -157,6 +157,21 @@ namespace {
            return env;
        }
    };
+    
+    class SparseRationalViToPiEnvironment {
+    public:
+        static const storm::dd::DdType ddType = storm::dd::DdType::Sylvan; // Unused for sparse models
+        static const MdpEngine engine = MdpEngine::PrismSparse;
+        static const bool isExact = true;
+        typedef storm::RationalNumber ValueType;
+        typedef storm::models::sparse::Mdp<ValueType> ModelType;
+        static storm::Environment createEnvironment() {
+            storm::Environment env;
+            env.solver().minMax().setMethod(storm::solver::MinMaxMethod::ViToPi);
+            return env;
+        }
+    };
+    
    class SparseRationalRationalSearchEnvironment {
    public:
        static const storm::dd::DdType ddType = storm::dd::DdType::Sylvan; // Unused for sparse models
@ -406,6 +421,7 @@ namespace {
            SparseDoubleTopologicalValueIterationEnvironment,
            SparseDoubleTopologicalSoundValueIterationEnvironment,
            SparseRationalPolicyIterationEnvironment,
+            SparseRationalViToPiEnvironment,
            SparseRationalRationalSearchEnvironment,
            HybridCuddDoubleValueIterationEnvironment,
            HybridSylvanDoubleValueIterationEnvironment,