policies for games

Former-commit-id: 8bfb325b60
10 years ago · 87c8241ec7
8 changed files with 167 additions and 8 deletions
--- a/src/modelchecker/region/ApproximationModel.cpp
+++ b/src/modelchecker/region/ApproximationModel.cpp
@ -277,8 +277,6 @@ namespace storm {
                //TODO: at this point, set policy to the one stored in the region.
                invokeSolver(computeLowerBounds, policy);
                
-                //TODO: policy for games.
-                if(policy.empty()) return this->solverData.result[this->solverData.initialStateIndex]; //This can be deleted as soon as policy for games is supported
                //TODO: (maybe) when a few parameters are mapped to another value, build a "nicer" scheduler and check whether it induces values that are more optimal.
                //Get the set of parameters which are (according to the policy) always mapped to the same region boundary.
                //First, collect all (relevant) parameters, i.e., the ones that are set to the lower or upper boundary.
@ -403,7 +401,10 @@ namespace storm {
            void ApproximationModel<storm::models::sparse::Mdp<storm::RationalFunction>, double>::invokeSolver(bool computeLowerBounds, Policy& policy){
                storm::solver::SolveGoal player2Goal(computeLowerBounds);
                std::unique_ptr<storm::solver::GameSolver<double>> solver = storm::utility::solver::GameSolverFactory<double>().create(this->solverData.player1Matrix, this->matrixData.matrix);
-                solver->solveGame(this->solverData.player1Goal.direction(), player2Goal.direction(), this->solverData.result, this->vectorData.vector);
+                solver->setPolicyTracking();
+                solver->solveGame(this->solverData.player1Goal.direction(), player2Goal.direction(), this->solverData.result, this->vectorData.vector, &this->solverData.lastPlayer1Policy, &policy);
+                this->solverData.lastPlayer1Policy = solver->getPlayer1Policy();
+                policy = solver->getPlayer2Policy();
            }
            
                
--- a/src/solver/AbstractGameSolver.cpp
+++ b/src/solver/AbstractGameSolver.cpp
@ -18,5 +18,19 @@ namespace storm {
        AbstractGameSolver::AbstractGameSolver(double precision, uint_fast64_t maximalNumberOfIterations, bool relative) : precision(precision), maximalNumberOfIterations(maximalNumberOfIterations), relative(relative) {
            // Intentionally left empty.
        }
+        
+        void AbstractGameSolver::setPolicyTracking(bool setToTrue) {
+            this->trackPolicies = setToTrue;
+        }
+        
+        std::vector<storm::storage::sparse::state_type> AbstractGameSolver::getPlayer1Policy() const {
+            assert(!this->player1Policy.empty());
+            return player1Policy;
+        }
+        
+        std::vector<storm::storage::sparse::state_type> AbstractGameSolver::getPlayer2Policy() const {
+            assert(!this->player2Policy.empty());
+            return player2Policy;
+        }
    }
 }
--- a/src/solver/AbstractGameSolver.h
+++ b/src/solver/AbstractGameSolver.h
@ -2,6 +2,8 @@
 #define STORM_SOLVER_ABSTRACTGAMESOLVER_H_

 #include <cstdint>
+#include "src/storage/sparse/StateType.h"
+#include "src/utility/vector.h"

 namespace storm {
    namespace solver {
@ -24,6 +26,11 @@ namespace storm {
             */
            AbstractGameSolver(double precision, uint_fast64_t maximalNumberOfIterations, bool relative);
            
+            void setPolicyTracking(bool setToTrue=true);
+            
+            std::vector<storm::storage::sparse::state_type> getPlayer1Policy() const;
+            std::vector<storm::storage::sparse::state_type> getPlayer2Policy() const;
+
        protected:
            // The precision to achieve.
            double precision;
@ -33,6 +40,14 @@ namespace storm {

            // A flag indicating whether a relative or an absolute stopping criterion is to be used.
            bool relative;
+            
+            // Whether we track the policies we generate.
+            bool trackPolicies;
+            
+            // The policies for the different players
+            mutable std::vector<storm::storage::sparse::state_type> player1Policy;
+            mutable std::vector<storm::storage::sparse::state_type> player2Policy;
+            
        };
    }
 }
--- a/src/solver/GameSolver.cpp
+++ b/src/solver/GameSolver.cpp
@ -1,7 +1,8 @@
 #include "src/solver/GameSolver.h"

+#include "src/solver/LinearEquationSolver.h"
+#include "src/utility/solver.h"
 #include "src/storage/SparseMatrix.h"
-
 #include "src/utility/vector.h"

 namespace storm {
@ -17,10 +18,30 @@ namespace storm {
        }

        template <typename ValueType>
-        void GameSolver<ValueType>::solveGame(OptimizationDirection player1Goal, OptimizationDirection player2Goal, std::vector<ValueType>& x, std::vector<ValueType> const& b) const {
-            // Set up the environment for value iteration.
+        void GameSolver<ValueType>::solveGame(OptimizationDirection player1Goal, OptimizationDirection player2Goal, std::vector<ValueType>& x, std::vector<ValueType> const& b, std::vector<storm::storage::sparse::state_type>* initialPlayer1Policy, std::vector<storm::storage::sparse::state_type>* initialPlayer2Policy) const {
            uint_fast64_t numberOfPlayer1States = x.size();
            
+            if(initialPlayer1Policy != nullptr || initialPlayer2Policy != nullptr){
+                //Either we work with both policies or none of them
+                assert(initialPlayer1Policy != nullptr && initialPlayer2Policy != nullptr);
+                //The policies select certain rows in the matrix of player2.
+                //However, note that rows can be selected more then once and in an arbitrary order.
+                std::vector<storm::storage::sparse::state_type> selectedRows(numberOfPlayer1States);
+                for (uint_fast64_t pl1State = 0; pl1State < numberOfPlayer1States; ++pl1State){
+                    auto const& pl1Choice = player1Matrix.getRow(player1Matrix.getRowGroupIndices()[pl1State] + (*initialPlayer1Policy)[pl1State]);
+                    assert(pl1Choice.getNumberOfEntries()==1);
+                    uint_fast64_t pl2State = pl1Choice.begin()->getColumn();
+                    selectedRows[pl1State] = player2Matrix.getRowGroupIndices()[pl2State] + (*initialPlayer2Policy)[pl2State];
+                }
+                storm::storage::SparseMatrix<ValueType> eqSysMatrix = player2Matrix.selectRowsFromRowIndexSequence(selectedRows, true);
+                eqSysMatrix.convertToEquationSystem();
+                std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> solver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(eqSysMatrix);
+                std::vector<ValueType> subB(numberOfPlayer1States);
+                storm::utility::vector::selectVectorValues<ValueType>(subB, selectedRows, b);
+                solver->solveEquationSystem(x, subB);
+            }
+            
+            // Set up the environment for value iteration.
            bool converged = false;
            std::vector<ValueType> tmpResult(numberOfPlayer1States);
            std::vector<ValueType> nondetResult(player2Matrix.getRowCount());
@ -69,6 +90,44 @@ namespace storm {

                ++iterations;
            } while (!converged && iterations < maximalNumberOfIterations);
+            
+            STORM_LOG_WARN_COND(converged, "Iterative solver for stochastic two player games did not converge after " << iterations << "iterations.");
+            
+            if(this->trackPolicies){
+                this->player2Policy = std::vector<storm::storage::sparse::state_type>(player2Matrix.getRowGroupCount());
+                storm::utility::vector::reduceVectorMinOrMax(player2Goal, nondetResult, player2Result, player2Matrix.getRowGroupIndices(), &this->player2Policy);
+
+                this->player1Policy = std::vector<storm::storage::sparse::state_type>(numberOfPlayer1States, 0);
+                for (uint_fast64_t pl1State = 0; pl1State < numberOfPlayer1States; ++pl1State) {
+                    storm::storage::SparseMatrix<storm::storage::sparse::state_type>::const_rows relevantRows = player1Matrix.getRowGroup(pl1State);
+                    if (relevantRows.getNumberOfEntries() > 0) {
+                        storm::storage::SparseMatrix<storm::storage::sparse::state_type>::const_iterator it = relevantRows.begin();
+                        storm::storage::SparseMatrix<storm::storage::sparse::state_type>::const_iterator ite = relevantRows.end();
+                        // Set the first value.
+                        tmpResult[pl1State] = player2Result[it->getColumn()];
+                        ++it;
+                        storm::storage::sparse::state_type localChoice = 1;
+                        // Now iterate through the different values and pick the extremal one.
+                        if (player1Goal == OptimizationDirection::Minimize) {
+                            for (; it != ite; ++it, ++localChoice) {
+                                if(player2Result[it->getColumn()] < tmpResult[pl1State]){
+                                    tmpResult[pl1State] = player2Result[it->getColumn()];
+                                    this->player1Policy[pl1State] = localChoice;
+                                }
+                            }
+                        } else {
+                            for (; it != ite; ++it, ++localChoice) {
+                                if(player2Result[it->getColumn()] > tmpResult[pl1State]){
+                                    tmpResult[pl1State] = player2Result[it->getColumn()];
+                                    this->player1Policy[pl1State] = localChoice;
+                                }
+                            }
+                        }
+                    } else {
+                        STORM_LOG_ERROR("There is no choice for Player 1 at state " << pl1State << " in the stochastic two player game. This is not expected!");
+                    }
+                }
+            }
        }

        template class GameSolver<double>;
--- a/src/solver/GameSolver.h
+++ b/src/solver/GameSolver.h
@ -45,9 +45,11 @@ namespace storm {
             * @param player2Goal Sets whether player 2 wants to minimize or maximize.
             * @param x The initial guess of the solution.
             * @param b The vector to add after matrix-vector multiplication.
+             * @param initialPlayer1Policy A policy that selects rows in every rowgroup of player1. This will be used as an initial guess
+             * @param initialPlayer2Policy A policy that selects rows in every rowgroup of player2. This will be used as an initial guess
             * @return The solution vector in the for of the vector x.
             */
-            virtual void solveGame(OptimizationDirection player1Goal, OptimizationDirection player2Goal, std::vector<ValueType>& x, std::vector<ValueType> const& b) const;
+            virtual void solveGame(OptimizationDirection player1Goal, OptimizationDirection player2Goal, std::vector<ValueType>& x, std::vector<ValueType> const& b, std::vector<storm::storage::sparse::state_type>* initialPlayer1Policy = nullptr, std::vector<storm::storage::sparse::state_type>* initialPlayer2Policy = nullptr) const;

        private:
            // The matrix defining the choices of player 1.
@ -55,6 +57,7 @@ namespace storm {

            // The matrix defining the choices of player 2.
            storm::storage::SparseMatrix<ValueType> const& player2Matrix;
+            
        };
    }
 }
--- a/src/storage/SparseMatrix.cpp
+++ b/src/storage/SparseMatrix.cpp
@ -718,6 +718,48 @@ namespace storm {
            return matrixBuilder.build();
        }
        
+        template<typename ValueType>
+        SparseMatrix<ValueType> SparseMatrix<ValueType>::selectRowsFromRowIndexSequence(std::vector<index_type> const& rowIndexSequence, bool insertDiagonalEntries) const{
+            // First, we need to count how many non-zero entries the resulting matrix will have and reserve space for
+            // diagonal entries if requested.
+            index_type newEntries = 0;
+            for(index_type row = 0, rowEnd = rowIndexSequence.size(); row < rowEnd; ++row) {
+                bool foundDiagonalElement = false;
+                for (const_iterator it = this->begin(rowIndexSequence[row]), ite = this->end(rowIndexSequence[row]); it != ite; ++it) {
+                    if (it->getColumn() == row) {
+                        foundDiagonalElement = true;
+                    }
+                    ++newEntries;
+                }
+                if (insertDiagonalEntries && !foundDiagonalElement) {
+                    ++newEntries;
+                }
+            }
+            
+            // Now create the matrix to be returned with the appropriate size.
+            SparseMatrixBuilder<ValueType> matrixBuilder(rowIndexSequence.size(), columnCount, newEntries);
+            
+            // Copy over the selected rows from the source matrix.
+            for(index_type row = 0, rowEnd = rowIndexSequence.size(); row < rowEnd; ++row) {
+                bool insertedDiagonalElement = false;
+                for (const_iterator it = this->begin(rowIndexSequence[row]), ite = this->end(rowIndexSequence[row]); it != ite; ++it) {
+                    if (it->getColumn() == row) {
+                        insertedDiagonalElement = true;
+                    } else if (insertDiagonalEntries && !insertedDiagonalElement && it->getColumn() > row) {
+                        matrixBuilder.addNextValue(row, row, storm::utility::zero<ValueType>());
+                        insertedDiagonalElement = true;
+                    }
+                    matrixBuilder.addNextValue(row, it->getColumn(), it->getValue());
+                }
+                if (insertDiagonalEntries && !insertedDiagonalElement) {
+                    matrixBuilder.addNextValue(row, row, storm::utility::zero<ValueType>());
+                }
+            }
+            
+            // Finally create matrix and return result.
+            return matrixBuilder.build();
+        }
+        
        template <typename ValueType>
        SparseMatrix<ValueType> SparseMatrix<ValueType>::transpose(bool joinGroups, bool keepZeros) const {
            index_type rowCount = this->getColumnCount();
--- a/src/storage/SparseMatrix.h
+++ b/src/storage/SparseMatrix.h
@ -633,6 +633,17 @@ namespace storm {
             */
            SparseMatrix selectRowsFromRowGroups(std::vector<index_type> const& rowGroupToRowIndexMapping, bool insertDiagonalEntries = true) const;
            
+            /*!
+             * Selects the rows that are given by the sequence of row indices, allowing to select rows arbitrarily often and with an arbitrary order
+             * The resulting matrix will have a trivial row grouping
+             *
+             * @param rowIndexSequence the sequence of row indices which specifies, which rows are contained in the new matrix
+             * @param insertDiagonalEntries If set to true, the resulting matrix will have zero entries in column i for
+             * each row. This can then be used for inserting other values later.
+             * @return A matrix which rows are selected from this matrix according to the given index sequence
+             */
+            SparseMatrix selectRowsFromRowIndexSequence(std::vector<index_type> const& rowIndexSequence, bool insertDiagonalEntries = true) const;
+            
            /*!
             * Transposes the matrix.
             *
--- a/src/utility/vector.h
+++ b/src/utility/vector.h
@ -149,6 +149,20 @@ namespace storm {
                }
            }
            
+            /*!
+             * Selects values from a vector at the specified sequence of indices and writes them into another vector
+             *
+             * @param vector The vector into which the selected elements are written.
+             * @param indexSequence a sequence of indices at which the desired values can be found
+             * @param values the values from which to select
+             */
+            template<class T>
+            void selectVectorValues(std::vector<T>& vector, std::vector<uint_fast64_t> const& indexSequence, std::vector<T> const& values) {
+                for (uint_fast64_t vectorIndex = 0; vectorIndex < vector.size(); ++vectorIndex){
+                    vector[vectorIndex] = values[indexSequence[vectorIndex]];
+                }
+            }
+            
            /*!
             * Selects values from a vector at the specified positions and writes them into another vector as often as given by
             * the size of the corresponding group of elements.