Work on policy recycling

Former-commit-id: b8e94254ce
10 years ago · 9c645ba24b
9 changed files with 500 additions and 127 deletions
--- a/src/modelchecker/region/ApproximationModel.cpp
+++ b/src/modelchecker/region/ApproximationModel.cpp
@ -67,13 +67,13 @@ namespace storm {
                this->vectorData.assignment.shrink_to_fit();
                if(parametricModel.getType()==storm::models::ModelType::Mdp){
                    initializePlayer1Matrix(parametricModel);
+                    this->solverData.lastPlayer1Policy = Policy(this->solverData.player1Matrix.getRowGroupCount(), 0);
                }
                
                this->solverData.result = std::vector<ConstantType>(maybeStates.getNumberOfSetBits(), this->computeRewards ? storm::utility::one<ConstantType>() : ConstantType(0.5));
                this->solverData.initialStateIndex = newIndices[initialState];
                this->solverData.lastMinimizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0);
                this->solverData.lastMaximizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0);
-                this->solverData.lastPlayer1Policy = Policy(this->matrixData.matrix.getRowGroupCount(), 0);
            }                

            template<typename ParametricSparseModelType, typename ConstantType>
@ -325,9 +325,9 @@ namespace storm {
                }
       //         std::cout << "Used Approximation" << std::endl;
                for (auto const& varcount : VarCount){
-                    if(varcount.second.first > 0 && varcount.second.second > 0){
-              //          std::cout << "  Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl;
-                    }
+                  //  if(varcount.second.first > 0 && varcount.second.second > 0){
+          //              std::cout << "  Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl;
+                 //   }
                }
                for (auto const& fixVar : fixedVariables){
                    //std::cout << "  APPROXMODEL: variable " << fixVar.first << " is always mapped to " << fixVar.second << std::endl;
--- a/src/solver/AbstractGameSolver.cpp
+++ b/src/solver/AbstractGameSolver.cpp
@ -32,5 +32,14 @@ namespace storm {
            assert(!this->player2Policy.empty());
            return player2Policy;
        }
+        
+        double AbstractGameSolver::getPrecision() const {
+            return precision;
+        }
+        
+        bool AbstractGameSolver::getRelative() const {
+            return relative;
+        }
+        
    }
 }
--- a/src/solver/AbstractGameSolver.h
+++ b/src/solver/AbstractGameSolver.h
@ -30,6 +30,8 @@ namespace storm {
            
            std::vector<storm::storage::sparse::state_type> getPlayer1Policy() const;
            std::vector<storm::storage::sparse::state_type> getPlayer2Policy() const;
+            double getPrecision() const;
+            bool getRelative() const;

        protected:
            // The precision to achieve.
--- a/src/solver/MinMaxLinearEquationSolver.cpp
+++ b/src/solver/MinMaxLinearEquationSolver.cpp
@ -27,5 +27,14 @@ namespace storm {
            assert(!policy.empty());
            return policy;
        }
+        
+        double AbstractMinMaxLinearEquationSolver::getPrecision() const {
+            return precision;
+        }
+        
+        bool AbstractMinMaxLinearEquationSolver::getRelative() const {
+            return relative;
+        }
+        
    }
 }
--- a/src/solver/MinMaxLinearEquationSolver.h
+++ b/src/solver/MinMaxLinearEquationSolver.h
@ -27,6 +27,9 @@ namespace storm {
            
            std::vector<storm::storage::sparse::state_type> getPolicy() const;
            
+            double getPrecision() const;
+            bool getRelative() const;
+            
            void setOptimizationDirection(OptimizationDirection d) {
                direction = convert(d);
            }
--- a/src/storage/SparseMatrix.cpp
+++ b/src/storage/SparseMatrix.cpp
@ -999,6 +999,15 @@ namespace storm {
        }
 #endif
        
+        template<typename ValueType>
+        ValueType SparseMatrix<ValueType>::multiplyRowWithVector(index_type row, std::vector<ValueType> const& vector) const {
+            ValueType result = storm::utility::zero<ValueType>();
+            for(auto const& entry : this->getRow(row)){
+                result += entry.getValue() * vector[entry.getColumn()];
+            }
+            return result;
+        }
+        
        template<typename ValueType>
        void SparseMatrix<ValueType>::performSuccessiveOverRelaxationStep(ValueType omega, std::vector<ValueType>& x, std::vector<ValueType> const& b) const {
            const_iterator it = this->begin();
--- a/src/storage/SparseMatrix.h
+++ b/src/storage/SparseMatrix.h
@ -706,6 +706,15 @@ namespace storm {
             */
            void multiplyWithVector(std::vector<value_type> const& vector, std::vector<value_type>& result) const;
            
+            /*!
+             * Multiplies a single row of the matrix with the given vector and returns the result
+             *
+             * @param row The index of the row with which to multiply
+             * @param vector The vector with which to multiply the row.
+             * @return the result of the multiplication.
+             */
+            value_type multiplyRowWithVector(index_type row, std::vector<value_type> const& vector) const;
+
            /*!
             * Multiplies the vector to the matrix from the left and writes the result to the given result vector.
             *
--- a/src/utility/policyguessing.cpp
+++ b/src/utility/policyguessing.cpp
@ -8,12 +8,11 @@

 #include "src/utility/policyguessing.h"

-#include "src/storage/SparseMatrix.h"
 #include "src/utility/macros.h"
 #include "src/utility/solver.h"
 #include "src/solver/LinearEquationSolver.h"
-#include "src/solver/GameSolver.h"
 #include "graph.h"
+#include "ConstantsComparator.h"

 namespace storm {
    namespace utility{
@ -29,25 +28,121 @@ namespace storm {
                            std::vector<storm::storage::sparse::state_type>& player2Policy,
                            storm::storage::BitVector const& targetChoices,
                            ValueType const& prob0Value
-                     ){
+                    ){
+                
+             //               std::vector<storm::storage::sparse::state_type> pl1Policy = player1Policy;
+             //               std::vector<storm::storage::sparse::state_type> pl2Policy = player2Policy;
+                storm::storage::SparseMatrix<ValueType> inducedA;
+                std::vector<ValueType> inducedB;
+                storm::storage::BitVector probGreater0States;
+                getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
+                
+                solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
+                
+                solver.setPolicyTracking();
+                bool resultCorrect = false;
+                while(!resultCorrect){
+                    solver.solveGame(player1Goal, player2Goal, x, b);
+                    player1Policy = solver.getPlayer1Policy();
+                    player2Policy = solver.getPlayer2Policy();
+                    
+                    //Check if the policies makes choices that lead to states from which no target state is reachable ("prob0"-states). 
+                    getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
+                    resultCorrect = checkAndFixPolicy(solver, x, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
+                    if(!resultCorrect){
+                        //If the policy could not be fixed, it indicates that our guessed values were to high.
+                        STORM_LOG_WARN("Policies could not be fixed. Restarting Gamesolver. ");
+                        solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
+                        //x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
+                    }
+                }
+                /*
+                        std::size_t p2Precount=0;
+                        std::size_t p2Postcount=0;
+                        std::size_t p1diff =0;
+                        std::size_t p2diff =0;
+                        std::size_t p2RelevantCount=0;
+                        storm::storage::BitVector relevantP2Groups(pl2Policy.size(),false);
+                        for(std::size_t i = 0; i<pl1Policy.size(); ++i){
+                            if(pl1Policy[i] != player1Policy[i]){
+                                ++p1diff;
+                            }
+                            std::size_t row = solver.getPlayer1Matrix().getRowGroupIndices()[i] + player1Policy[i];
+                            auto rowObj = solver.getPlayer1Matrix().getRow(row);
+                            relevantP2Groups.set(rowObj.begin()->getColumn());
+                        }
+                        for (std::size_t i : relevantP2Groups){
+                            if(pl2Policy[i] != player2Policy[i]){
+                                ++p2RelevantCount;
+                            }
+                        }
+                        for(std::size_t i = 0; i<pl2Policy.size(); ++i){
+                            if(pl2Policy[i] != player2Policy[i]){
+                                ++p2diff;
+                            }
+                            p2Precount += pl2Policy[i];
+                            p2Postcount += player2Policy[i];
+                        }
+                        std::cout << "P1: " << (player1Goal == OptimizationDirection::Minimize ? "MIN " : "MAX ");
+                        std::cout << "P2: " << (player2Goal == OptimizationDirection::Minimize ? "MIN " : "MAX ");
+                        std::cout << "Changes: P1: " << p1diff;
+                        std::cout << " P2: " << p2diff << " (" << p2RelevantCount << " relevant)";
+                        std::cout << " Counts P2: " << p2Precount << " and " << p2Postcount << ".";
+                        std::cout << std::endl;
+                  */      
+                
+                
+            }
+            
+            template <typename ValueType>
+            void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver<ValueType>& solver,
+                                    std::vector<ValueType>& x,
+                                    std::vector<ValueType> const& b,
+                                    OptimizationDirection goal,
+                                    std::vector<storm::storage::sparse::state_type>& policy,
+                                    storm::storage::BitVector const& targetChoices,
+                                    ValueType const& prob0Value
+                                ){
+                storm::storage::SparseMatrix<ValueType> inducedA;
+                std::vector<ValueType> inducedB;
+                storm::storage::BitVector probGreater0States;
+                getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
+                solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
                
-                solveInducedEquationSystem(solver, x, b, player1Policy, player2Policy, targetChoices, prob0Value);
                solver.setPolicyTracking();
-                solver.solveGame(player1Goal, player2Goal, x, b);
-                player1Policy = solver.getPlayer1Policy();
-                player2Policy = solver.getPlayer2Policy();
+                bool resultCorrect = false;
+                while(!resultCorrect){
+                    solver.solveEquationSystem(goal, x, b);
+                    policy = solver.getPolicy();
+                    
+                    //Check if the policy makes choices that lead to states from which no target state is reachable ("prob0"-states). 
+                    getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
+                    resultCorrect = checkAndFixPolicy(solver, x, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
+                    
+                    if(!resultCorrect){
+                        //If the policy could not be fixed, it indicates that our guessed values were to high.
+                        STORM_LOG_WARN("Policy could not be fixed. Restarting MinMaxsolver." );
+                        solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
+                        //x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
+                    }
+                }
+                
+                
            }
            
            
+            
            template <typename ValueType>
-            void solveInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
-                                            std::vector<ValueType>& x,
+            void getInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
                                            std::vector<ValueType> const& b,
                                            std::vector<storm::storage::sparse::state_type> const& player1Policy, 
                                            std::vector<storm::storage::sparse::state_type> const& player2Policy,
                                            storm::storage::BitVector const& targetChoices,
-                                            ValueType const& prob0Value){
-                uint_fast64_t numberOfPlayer1States = x.size();
+                                            storm::storage::SparseMatrix<ValueType>& inducedA,
+                                            std::vector<ValueType>& inducedB,
+                                            storm::storage::BitVector& probGreater0States
+                                        ){
+                uint_fast64_t numberOfPlayer1States = solver.getPlayer1Matrix().getRowGroupCount();
                
                //Get the rows of the player2matrix that are selected by the policies
                //Note that rows can be selected more then once and in an arbitrary order.
@ -58,10 +153,9 @@ namespace storm {
                    uint_fast64_t pl2State = pl1Row.begin()->getColumn();
                    selectedRows[pl1State] = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State];
                }
-                
                //Get the matrix A, vector b, and the targetStates induced by this selection
-                storm::storage::SparseMatrix<ValueType> inducedA = solver.getPlayer2Matrix().selectRowsFromRowIndexSequence(selectedRows, false);
-                std::vector<ValueType> inducedB(numberOfPlayer1States);
+                inducedA = solver.getPlayer2Matrix().selectRowsFromRowIndexSequence(selectedRows, false);
+                inducedB = std::vector<ValueType>(numberOfPlayer1States);
                storm::utility::vector::selectVectorValues<ValueType>(inducedB, selectedRows, b);
                storm::storage::BitVector inducedTarget(numberOfPlayer1States, false);
                for (uint_fast64_t pl1State = 0; pl1State < numberOfPlayer1States; ++pl1State){
@ -69,57 +163,25 @@ namespace storm {
                        inducedTarget.set(pl1State);
                    }
                }
-                
                //Find the states from which no target state is reachable.
-                //Note that depending on the policies, qualitative properties might have changed which makes this step necessary.
-                storm::storage::BitVector probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfPlayer1States, true), inducedTarget);
-                
-                //Get the final A,x, and b and invoke linear equation solver
-                storm::storage::SparseMatrix<ValueType> subA = inducedA.getSubmatrix(true, probGreater0States, probGreater0States, true);
-                subA.convertToEquationSystem();
-                std::vector<ValueType> subX(probGreater0States.getNumberOfSetBits());
-                storm::utility::vector::selectVectorValues(subX, probGreater0States, x);
-                std::vector<ValueType> subB(probGreater0States.getNumberOfSetBits());
-                storm::utility::vector::selectVectorValues(subB, probGreater0States, inducedB);
-                std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
-                linEqSysSolver->solveEquationSystem(subX, subB);
-            
-                //fill in the result
-                storm::utility::vector::setVectorValues(x, probGreater0States, subX);
-                storm::utility::vector::setVectorValues(x, (~probGreater0States), prob0Value);
-            }
-            
-            
-            template <typename ValueType>
-            void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver<ValueType>& solver,
-                                    std::vector<ValueType>& x,
-                                    std::vector<ValueType> const& b,
-                                    OptimizationDirection goal,
-                                    std::vector<storm::storage::sparse::state_type>& policy,
-                                    storm::storage::BitVector const& targetChoices,
-                                    ValueType const& prob0Value
-                                ){
-                
-                solveInducedEquationSystem(solver, x, b, policy, targetChoices, prob0Value);
-                solver.setPolicyTracking();
-                solver.solveEquationSystem(goal, x, b);
-                policy = solver.getPolicy();
+                probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfPlayer1States, true), inducedTarget);
            }
            
            
            template <typename ValueType>
-            void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
-                                            std::vector<ValueType>& x,
+            void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
                                            std::vector<ValueType> const& b,
                                            std::vector<storm::storage::sparse::state_type> const& policy,
                                            storm::storage::BitVector const& targetChoices,
-                                            ValueType const& prob0Value
+                                            storm::storage::SparseMatrix<ValueType>& inducedA,
+                                            std::vector<ValueType>& inducedB,
+                                            storm::storage::BitVector& probGreater0States
                                        ){
-                uint_fast64_t numberOfStates = x.size();
+                uint_fast64_t numberOfStates = solver.getMatrix().getRowGroupCount();
                
                //Get the matrix A, vector b, and the targetStates induced by the policy
-                storm::storage::SparseMatrix<ValueType> inducedA = solver.getMatrix().selectRowsFromRowGroups(policy, false);
-                std::vector<ValueType> inducedB(numberOfStates);
+                inducedA = solver.getMatrix().selectRowsFromRowGroups(policy, false);
+                inducedB = std::vector<ValueType>(numberOfStates);
                storm::utility::vector::selectVectorValues<ValueType>(inducedB, policy, solver.getMatrix().getRowGroupIndices(), b);
                storm::storage::BitVector inducedTarget(numberOfStates, false);
                for (uint_fast64_t state = 0; state < numberOfStates; ++state){
@ -127,18 +189,24 @@ namespace storm {
                        inducedTarget.set(state);
                    }
                }
-                
                //Find the states from which no target state is reachable.
-                //Note that depending on the policies, qualitative properties might have changed which makes this step necessary.
-                storm::storage::BitVector probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfStates, true), inducedTarget);
+                probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfStates, true), inducedTarget);
+            }
            
-                //Get the final A,x, and b and invoke linear equation solver
-                storm::storage::SparseMatrix<ValueType> subA = inducedA.getSubmatrix(true, probGreater0States, probGreater0States, true);
+            template<typename ValueType>
+            void solveLinearEquationSystem(storm::storage::SparseMatrix<ValueType>const& A,
+                                           std::vector<ValueType>& x,
+                                           std::vector<ValueType> const& b,
+                                           storm::storage::BitVector const& probGreater0States,
+                                           ValueType const& prob0Value
+                                        ){
+                //Get the submatrix/subvector A,x, and b and invoke linear equation solver
+                storm::storage::SparseMatrix<ValueType> subA = A.getSubmatrix(true, probGreater0States, probGreater0States, true);
                subA.convertToEquationSystem();
                std::vector<ValueType> subX(probGreater0States.getNumberOfSetBits());
                storm::utility::vector::selectVectorValues(subX, probGreater0States, x);
                std::vector<ValueType> subB(probGreater0States.getNumberOfSetBits());
-                storm::utility::vector::selectVectorValues(subB, probGreater0States, inducedB);
+                storm::utility::vector::selectVectorValues(subB, probGreater0States, b);
                std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
                linEqSysSolver->solveEquationSystem(subX, subB);
            
@ -148,6 +216,150 @@ namespace storm {
            }
            
            
+            template <typename ValueType>
+            bool checkAndFixPolicy(storm::solver::GameSolver<ValueType> const& solver,
+                                                    std::vector<ValueType> const& x,
+                                                    std::vector<ValueType> const& b,
+                                                    std::vector<storm::storage::sparse::state_type>& player1Policy,
+                                                    std::vector<storm::storage::sparse::state_type>& player2Policy,
+                                                    storm::storage::BitVector const& targetChoices,
+                                                    storm::storage::SparseMatrix<ValueType>& inducedA,
+                                                    std::vector<ValueType>& inducedB,
+                                                    storm::storage::BitVector& probGreater0States
+                                        ){
+                if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()) return true;
+                
+                bool policyChanged = true;
+                while(policyChanged){
+                    /*
+                     * Lets try to fix the issue by doing other choices that are equally good.
+                     * We change the policy in a state if the following conditions apply:
+                     * 1. The current choice does not lead to target
+                     * 2. There is another choice that leads to target
+                     * 3. The value of that choice is equal to the value of the choice given by the policy
+                     * Note that the values of the result will not change this way.
+                     * We do this unil the policy does not change anymore
+                     */
+                    policyChanged = false;
+                    //Player 1:
+                    for(uint_fast64_t pl1State=0; pl1State < player1Policy.size(); ++pl1State){
+                        uint_fast64_t pl1RowGroupIndex = solver.getPlayer1Matrix().getRowGroupIndices()[pl1State];
+                        //Check 1.: The current choice does not lead to target
+                        if(!probGreater0States.get(pl1State)){
+                            //1. Is satisfied. Check 2.: There is another choice that leads to target
+                            ValueType choiceValue = x[pl1State];
+                            for(uint_fast64_t otherChoice = 0; otherChoice < solver.getPlayer1Matrix().getRowGroupSize(pl1State); ++otherChoice){
+                                if(otherChoice == player1Policy[pl1State]) continue;
+                                //the otherChoice selects a player2 state in which player2 makes his choice (according to the player2Policy).
+                                uint_fast64_t pl2State = solver.getPlayer1Matrix().getRow(pl1RowGroupIndex + otherChoice).begin()->getColumn();
+                                uint_fast64_t pl2Row = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State];
+                                if(rowLeadsToTarget(pl2Row, solver.getPlayer2Matrix(), targetChoices, probGreater0States)){
+                                    //2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy
+                                    ValueType otherValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2Row, x) + b[pl2Row];
+                                    if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), !solver.getRelative())){
+                                        //3. is satisfied.
+                                        player1Policy[pl1State] = otherChoice;
+                                        probGreater0States.set(pl1State);
+                                        policyChanged = true;
+                                        break; //no need to check other choices
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    //update probGreater0States
+                    probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(probGreater0States.size(), true), probGreater0States);
+                    //Player 2:
+                    for(uint_fast64_t pl2State=0; pl2State < player2Policy.size(); ++pl2State){
+                        uint_fast64_t pl2RowGroupIndex = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State];
+                        //Check 1.: The current choice does not lead to target
+                        if(!rowLeadsToTarget(pl2RowGroupIndex + player2Policy[pl2State], solver.getPlayer2Matrix(), targetChoices, probGreater0States)){
+                            //1. Is satisfied. Check 2. There is another choice that leads to target
+                            ValueType choiceValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2RowGroupIndex + player2Policy[pl2State], x) + b[pl2RowGroupIndex + player2Policy[pl2State]];
+                            for(uint_fast64_t otherChoice = 0; otherChoice < solver.getPlayer2Matrix().getRowGroupSize(pl2State); ++otherChoice){
+                                if(otherChoice == player2Policy[pl2State]) continue;
+                                if(rowLeadsToTarget(pl2RowGroupIndex + otherChoice, solver.getPlayer2Matrix(), targetChoices, probGreater0States)){
+                                    //2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy
+                                    ValueType otherValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2RowGroupIndex + otherChoice, x) + b[pl2RowGroupIndex + otherChoice];
+                                    if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), solver.getRelative())){
+                                        //3. is satisfied.
+                                        player2Policy[pl2State] = otherChoice;
+                                        policyChanged = true;
+                                        break; //no need to check other choices
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    
+                    //update probGreater0States
+                    getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
+                    if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()){
+                        return true;
+                    }
+                }
+                //Reaching this point means that the policy does not change anymore and we could not fix it.
+                return false;
+            }
+            
+            template <typename ValueType>
+            bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
+                                    std::vector<ValueType> const& x,
+                                    std::vector<ValueType> const& b,
+                                    std::vector<storm::storage::sparse::state_type>& policy,
+                                    storm::storage::BitVector const& targetChoices,
+                                    storm::storage::SparseMatrix<ValueType>& inducedA,
+                                    std::vector<ValueType>& inducedB,
+                                    storm::storage::BitVector& probGreater0States
+                                ){
+                if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()) return true;
+                
+                bool policyChanged = true;
+                while(policyChanged){
+                    /*
+                     * Lets try to fix the issue by doing other choices that are equally good.
+                     * We change the policy in a state if the following conditions apply:
+                     * 1. The current choice does not lead to target
+                     * 2. There is another choice that leads to target
+                     * 3. The value of that choice is equal to the value of the choice given by the policy
+                     * Note that the values of the result will not change this way.
+                     * We do this unil the policy does not change anymore
+                     */
+                    policyChanged = false;
+                    for(uint_fast64_t state=0; state < policy.size(); ++state){
+                        uint_fast64_t rowGroupIndex = solver.getMatrix().getRowGroupIndices()[state];
+                        //Check 1.: The current choice does not lead to target
+                        if(!probGreater0States.get(state)){
+                            //1. Is satisfied. Check 2.: There is another choice that leads to target
+                            ValueType choiceValue = x[state];
+                            for(uint_fast64_t otherChoice = 0; otherChoice < solver.getMatrix().getRowGroupSize(state); ++otherChoice){
+                                if(otherChoice == policy[state]) continue;
+                                if(rowLeadsToTarget(rowGroupIndex + otherChoice, solver.getMatrix(), targetChoices, probGreater0States)){
+                                    //2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy
+                                    ValueType otherValue = solver.getMatrix().multiplyRowWithVector(rowGroupIndex + otherChoice, x) + b[rowGroupIndex + otherChoice];
+                                    if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), !solver.getRelative())){
+                                        //3. is satisfied.
+                                        policy[state] = otherChoice;
+                                        probGreater0States.set(state);
+                                        policyChanged = true;
+                                        break; //no need to check other choices
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    
+                    //update probGreater0States and equation system
+                    getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
+                    if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()){
+                        return true;
+                    }
+                }
+                //Reaching this point means that the policy does not change anymore and we could not fix it.
+                return false;
+            }
+            
+            
            template void solveGame<double>( storm::solver::GameSolver<double>& solver,
                                std::vector<double>& x,
                                std::vector<double> const& b,
@ -159,15 +371,6 @@ namespace storm {
                                double const& prob0Value
                            );
            
-            template void solveInducedEquationSystem<double>(storm::solver::GameSolver<double> const& solver,
-                                std::vector<double>& x,
-                                std::vector<double> const& b,
-                                std::vector<storm::storage::sparse::state_type> const& player1Policy, 
-                                std::vector<storm::storage::sparse::state_type> const& player2Policy,
-                                storm::storage::BitVector const& targetChoices,
-                                double const& prob0Value
-                            );
-            
            template void solveMinMaxLinearEquationSystem<double>( storm::solver::MinMaxLinearEquationSolver<double>& solver,
                                std::vector<double>& x,
                                std::vector<double> const& b,
@ -177,14 +380,53 @@ namespace storm {
                                double const& prob0Value
                            );
            
-            template void solveInducedEquationSystem<double>(storm::solver::MinMaxLinearEquationSolver<double> const& solver,
-                                std::vector<double>& x,
-                                std::vector<double> const& b,
-                                std::vector<storm::storage::sparse::state_type> const& policy,
-                                storm::storage::BitVector const& targetChoices,
-                                double const& prob0Value
+            template void getInducedEquationSystem<double>(storm::solver::GameSolver<double> const& solver,
+                                                    std::vector<double> const& b,
+                                                    std::vector<storm::storage::sparse::state_type> const& player1Policy,
+                                                    std::vector<storm::storage::sparse::state_type> const& player2Policy,
+                                                    storm::storage::BitVector const& targetChoices,
+                                                    storm::storage::SparseMatrix<double>& inducedA,
+                                                    std::vector<double>& inducedB,
+                                                    storm::storage::BitVector& probGreater0States
+                            );
+            
+            template void getInducedEquationSystem<double>(storm::solver::MinMaxLinearEquationSolver<double> const& solver,
+                                            std::vector<double> const& b,
+                                            std::vector<storm::storage::sparse::state_type> const& policy,
+                                            storm::storage::BitVector const& targetChoices,
+                                            storm::storage::SparseMatrix<double>& inducedA,
+                                            std::vector<double>& inducedB,
+                                            storm::storage::BitVector& probGreater0States
+                            );
+            
+            template void solveLinearEquationSystem<double>(storm::storage::SparseMatrix<double>const& A,
+                                           std::vector<double>& x,
+                                           std::vector<double> const& b,
+                                           storm::storage::BitVector const& probGreater0States,
+                                           double const& prob0Value
                            );
            
+            template bool checkAndFixPolicy<double>(storm::solver::GameSolver<double> const& solver,
+                                            std::vector<double> const& x,
+                                            std::vector<double> const& b,
+                                            std::vector<storm::storage::sparse::state_type>& player1Policy,
+                                            std::vector<storm::storage::sparse::state_type>& player2Policy,
+                                            storm::storage::BitVector const& targetChoices,
+                                            storm::storage::SparseMatrix<double>& inducedA,
+                                            std::vector<double>& inducedB,
+                                            storm::storage::BitVector& probGreater0States
+                                        );
+            
+            template bool checkAndFixPolicy<double>(storm::solver::MinMaxLinearEquationSolver<double> const& solver,
+                                    std::vector<double> const& x,
+                                    std::vector<double> const& b,
+                                    std::vector<storm::storage::sparse::state_type>& policy,
+                                    storm::storage::BitVector const& targetChoices,
+                                    storm::storage::SparseMatrix<double>& inducedA,
+                                    std::vector<double>& inducedB,
+                                    storm::storage::BitVector& probGreater0States
+                                );
+                
        }
    }
 }
--- a/src/utility/policyguessing.h
+++ b/src/utility/policyguessing.h
@ -18,6 +18,7 @@
 #include "src/utility/vector.h"
 #include "src/storage/BitVector.h"
 #include "src/storage/sparse/StateType.h"
+#include "src/storage/SparseMatrix.h"

 namespace storm {
    namespace utility{
@ -57,37 +58,6 @@ namespace storm {
                                    ValueType const& prob0Value
                                );
            
-            
-            /*!
-             * Solves the equation system defined by the matrix A and vector b' that result from applying
-             * the given policies to the matrices of the two players and the given b.
-             * 
-             * Note that, depending on the policies, the qualitative properties of the graph defined by A
-             * might be different to the original graph of the game.
-             * To ensure a unique solution, we need to filter out the "prob0"-states.
-             * (Notice that new "prob1"-states do not harm as actual target states are always excluded, independent of the choice)
-             *
-             * @param solver the solver that contains the two player matrices
-             * @param x The initial guess of the solution.
-             * @param b The vector in which to select the entries of the right hand side
-             * @param player1Policy A policy that selects rows in every rowgroup of player1.
-             * @param player2Policy A policy that selects rows in every rowgroup of player2.
-             * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
-             * @param prob0Value the value that is assigned to the states that have probability zero to reach a target
-             * @return The solution vector in the form of the vector x.
-             */
-            template<typename ValueType>
-            void solveInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
-                                                    std::vector<ValueType>& x,
-                                                    std::vector<ValueType> const& b,
-                                                    std::vector<storm::storage::sparse::state_type> const& player1Policy,
-                                                    std::vector<storm::storage::sparse::state_type> const& player2Policy,
-                                                    storm::storage::BitVector const& targetChoices,
-                                                    ValueType const& prob0Value
-                                                );
-            
-            
-            
            /*!
             * invokes the given MinMaxLinearEquationSolver.
             * 
@ -100,7 +70,7 @@ namespace storm {
             * To identify these states and set the result for them correctly, it is necessary to know whether rewards or probabilities are to be computed
             *

-             * @param solver the solver that contains the two player matrices
+             * @param solver the solver that contains the matrix
             * @param x The initial guess of the solution.
             * @param b The vector to add after matrix-vector multiplication.
             * @param goal Sets whether we want to minimize or maximize.
@ -120,31 +90,151 @@ namespace storm {
                                );
            
            /*!
-             * Solves the equation system defined by the matrix A and vector b' that result from applying
-             * the given policy to the matrices of the two players and the given b.
+             * Constructs the equation system defined by the matrix inducedA and vector inducedB that result from applying
+             * the given policies to the matrices of the two players and the given b.
             * 
-             * Note that, depending on the policy, the qualitative properties of the graph defined by A
-             * might be different to the original graph
-             * To ensure a unique solution, we need to filter out the "prob0"-states.
-             * (Notice that new "prob1"-states do not harm as actual target states are always excluded, independent of the choice)
+             * Note that, depending on the policies, the qualitative properties of the graph defined by inducedA
+             * might be different to the original graph.
             * 
-             * @param solverthe solver that contains the two player matrices
-             * @param x The initial guess of the solution.
+             * @param solver the solver that contains the two player matrices
+             * @param b The vector in which to select the entries of the right hand side
+             * @param player1Policy A policy that selects rows in every rowgroup of player1.
+             * @param player2Policy A policy that selects rows in every rowgroup of player2.
+             * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
+             * @param inducedA the Matrix for the resulting equation system
+             * @param inducedB the Vector for the resulting equation system
+             * @param probGreater0States marks the  states which have a positive probability to lead to a target state
+             * @return Induced A, b and targets
+             */
+            template<typename ValueType>
+            void getInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
+                                                    std::vector<ValueType> const& b,
+                                                    std::vector<storm::storage::sparse::state_type> const& player1Policy,
+                                                    std::vector<storm::storage::sparse::state_type> const& player2Policy,
+                                                    storm::storage::BitVector const& targetChoices,
+                                                    storm::storage::SparseMatrix<ValueType>& inducedA,
+                                                    std::vector<ValueType>& inducedB,
+                                                    storm::storage::BitVector& probGreater0States
+                                                );
+            
+            /*!
+             * Constructs the equation system defined by the matrix inducedA and vector inducedB that result from applying
+             * the given policy to the matrix from the given solver and the given b.
+             * 
+             * Note that, depending on the policies, the qualitative properties of the graph defined by inducedA
+             * might be different to the original graph.
+             * 
+             * @param solver the solver that contains the matrix
             * @param b The vector in which to select the entries of the right hand side
             * @param policy A policy that selects rows in every rowgroup.
-             * @param targetChoices marks the rows in the matrix that have a positive probability to lead to a target state
-             * @param prob0Value the value that is assigned to the states that have probability zero to reach a target
-             * @return The solution vector in the form of the vector x.
+             * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
+             * @param inducedA the Matrix for the resulting equation system
+             * @param inducedB the Vector for the resulting equation system
+             * @param probGreater0States marks the states which have a positive probability to lead to a target state
+             * @return Induced A, b and targets
             */
            template<typename ValueType>
-            void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
-                                            std::vector<ValueType>& x,
+            void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
                                            std::vector<ValueType> const& b,
                                            std::vector<storm::storage::sparse::state_type> const& policy,
                                            storm::storage::BitVector const& targetChoices,
-                                            ValueType const& prob0Value
+                                            storm::storage::SparseMatrix<ValueType>& inducedA,
+                                            std::vector<ValueType>& inducedB,
+                                            storm::storage::BitVector& probGreater0States
                                        );
            
+            /*!
+             * Solves the given equation system.
+             * 
+             * It is not assumed that qualitative properties of the Graph defined by A have been checked, yet.
+             * However, actual target states are already filtered out.
+             * To ensure a unique solution, we also need to filter out the "prob0"-states.
+             *
+             * @param A the matrix of the equation system
+             * @param x The initial guess of the solution.
+             * @param b The vector of the equation system
+             * @param targetChoices marks the rows in the matrix that have a positive probability to lead to a target state
+             * @param prob0Value the value that is assigned to the states that have probability zero to reach a target
+             * @return The solution vector in the form of the vector x.
+             */
+            template<typename ValueType>
+            void solveLinearEquationSystem(storm::storage::SparseMatrix<ValueType>const& A,
+                                           std::vector<ValueType>& x,
+                                           std::vector<ValueType> const& b,
+                                           storm::storage::BitVector const& probGreater0States,
+                                           ValueType const& prob0Value
+                                        );
+            
+            /*!
+             * Checks if the given policies make choices that lead to states from which no target state is reachable ("prob0"-states). 
+             * This can happen when value iteration is applied and there are multiple choices with the same value
+             * (e.g. a state that allows to chose a selfloop with probability one)
+             * 
+             * If the policies are changed, they are updated accordingly (as well as the given inducedA, inducedB and probGreater0States)
+             * 
+             * @param solver the solver that contains the two player matrices
+             * @param x the solution vector (the result from value iteration)
+             * @param b The vector in which to select the entries of the right hand side
+             * @param player1Policy A policy that selects rows in every rowgroup of player1.
+             * @param player2Policy A policy that selects rows in every rowgroup of player2.
+             * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
+             * @param inducedA the Matrix for the equation system
+             * @param inducedB the Vector for the equation system
+             * @param probGreater0States marks the  states which have a positive probability to lead to a target state
+             * @return true iff there are no more prob0-states. Also changes the given policies accordingly
+             */
+            template<typename ValueType>
+            bool checkAndFixPolicy(storm::solver::GameSolver<ValueType> const& solver,
+                                                    std::vector<ValueType> const& x,
+                                                    std::vector<ValueType> const& b,
+                                                    std::vector<storm::storage::sparse::state_type>& player1Policy,
+                                                    std::vector<storm::storage::sparse::state_type>& player2Policy,
+                                                    storm::storage::BitVector const& targetChoices,
+                                                    storm::storage::SparseMatrix<ValueType>& inducedA,
+                                                    std::vector<ValueType>& inducedB,
+                                                    storm::storage::BitVector& probGreater0States
+                                                );
+            
+            /*!
+             * Checks if the given policies make choices that lead to states from which no target state is reachable ("prob0"-states). 
+             * This can happen when value iteration is applied and there are multiple choices with the same value
+             * (e.g. a state that allows to chose a selfloop with probability one)
+             * 
+             * If the policies are changed, they are updated accordingly (as well as the given inducedA, inducedB and probGreater0States)
+             * 
+             * @param solver the solver that contains the two player matrices
+             * @param x the solution vector (the result from value iteration)
+             * @param b The vector in which to select the entries of the right hand side
+             * @param policy A policy that selects rows in every rowgroup.
+             * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
+             * @param inducedA the Matrix for the equation system
+             * @param inducedB the Vector for the equation system
+             * @param probGreater0States marks the  states which have a positive probability to lead to a target state
+             * @return true iff there are no more prob0-states. Also changes the given policies accordingly
+             */
+            template<typename ValueType>
+            bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
+                                    std::vector<ValueType> const& x,
+                                    std::vector<ValueType> const& b,
+                                    std::vector<storm::storage::sparse::state_type>& policy,
+                                    storm::storage::BitVector const& targetChoices,
+                                    storm::storage::SparseMatrix<ValueType>& inducedA,
+                                    std::vector<ValueType>& inducedB,
+                                    storm::storage::BitVector& probGreater0States
+                                );
+            
+            //little helper function
+            template<typename ValueType>
+            bool rowLeadsToTarget(uint_fast64_t row,
+                                  storm::storage::SparseMatrix<ValueType> const& matrix,
+                                  storm::storage::BitVector const& targetChoices,
+                                  storm::storage::BitVector const& probGreater0States){
+                if(targetChoices.get(row)) return true;
+                for(auto const& successor : matrix.getRow(row)){
+                    if(probGreater0States.get(successor.getColumn())) return true;
+                }
+                return false;
+            }
        }
    }
 }