diff --git a/src/modelchecker/region/ApproximationModel.cpp b/src/modelchecker/region/ApproximationModel.cpp index e28b82901..9da24bdf7 100644 --- a/src/modelchecker/region/ApproximationModel.cpp +++ b/src/modelchecker/region/ApproximationModel.cpp @@ -67,13 +67,13 @@ namespace storm { this->vectorData.assignment.shrink_to_fit(); if(parametricModel.getType()==storm::models::ModelType::Mdp){ initializePlayer1Matrix(parametricModel); + this->solverData.lastPlayer1Policy = Policy(this->solverData.player1Matrix.getRowGroupCount(), 0); } this->solverData.result = std::vector(maybeStates.getNumberOfSetBits(), this->computeRewards ? storm::utility::one() : ConstantType(0.5)); this->solverData.initialStateIndex = newIndices[initialState]; this->solverData.lastMinimizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0); this->solverData.lastMaximizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0); - this->solverData.lastPlayer1Policy = Policy(this->matrixData.matrix.getRowGroupCount(), 0); } template @@ -325,9 +325,9 @@ namespace storm { } // std::cout << "Used Approximation" << std::endl; for (auto const& varcount : VarCount){ - if(varcount.second.first > 0 && varcount.second.second > 0){ - // std::cout << " Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl; - } + // if(varcount.second.first > 0 && varcount.second.second > 0){ + // std::cout << " Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl; + // } } for (auto const& fixVar : fixedVariables){ //std::cout << " APPROXMODEL: variable " << fixVar.first << " is always mapped to " << fixVar.second << std::endl; diff --git a/src/solver/AbstractGameSolver.cpp b/src/solver/AbstractGameSolver.cpp index 56f6e0974..243a98a46 100644 --- a/src/solver/AbstractGameSolver.cpp +++ b/src/solver/AbstractGameSolver.cpp @@ -32,5 +32,14 @@ namespace storm { assert(!this->player2Policy.empty()); return player2Policy; } + + double AbstractGameSolver::getPrecision() const { + return precision; + } + + bool AbstractGameSolver::getRelative() const { + return relative; + } + } } \ No newline at end of file diff --git a/src/solver/AbstractGameSolver.h b/src/solver/AbstractGameSolver.h index 05f420a80..927b6ab8f 100644 --- a/src/solver/AbstractGameSolver.h +++ b/src/solver/AbstractGameSolver.h @@ -30,6 +30,8 @@ namespace storm { std::vector getPlayer1Policy() const; std::vector getPlayer2Policy() const; + double getPrecision() const; + bool getRelative() const; protected: // The precision to achieve. diff --git a/src/solver/MinMaxLinearEquationSolver.cpp b/src/solver/MinMaxLinearEquationSolver.cpp index 7df50cfff..c01e7a724 100644 --- a/src/solver/MinMaxLinearEquationSolver.cpp +++ b/src/solver/MinMaxLinearEquationSolver.cpp @@ -27,5 +27,14 @@ namespace storm { assert(!policy.empty()); return policy; } + + double AbstractMinMaxLinearEquationSolver::getPrecision() const { + return precision; + } + + bool AbstractMinMaxLinearEquationSolver::getRelative() const { + return relative; + } + } } diff --git a/src/solver/MinMaxLinearEquationSolver.h b/src/solver/MinMaxLinearEquationSolver.h index 7d3bbe287..d294b8fb3 100644 --- a/src/solver/MinMaxLinearEquationSolver.h +++ b/src/solver/MinMaxLinearEquationSolver.h @@ -27,6 +27,9 @@ namespace storm { std::vector getPolicy() const; + double getPrecision() const; + bool getRelative() const; + void setOptimizationDirection(OptimizationDirection d) { direction = convert(d); } diff --git a/src/storage/SparseMatrix.cpp b/src/storage/SparseMatrix.cpp index ba2ff746a..332641ceb 100644 --- a/src/storage/SparseMatrix.cpp +++ b/src/storage/SparseMatrix.cpp @@ -999,6 +999,15 @@ namespace storm { } #endif + template + ValueType SparseMatrix::multiplyRowWithVector(index_type row, std::vector const& vector) const { + ValueType result = storm::utility::zero(); + for(auto const& entry : this->getRow(row)){ + result += entry.getValue() * vector[entry.getColumn()]; + } + return result; + } + template void SparseMatrix::performSuccessiveOverRelaxationStep(ValueType omega, std::vector& x, std::vector const& b) const { const_iterator it = this->begin(); diff --git a/src/storage/SparseMatrix.h b/src/storage/SparseMatrix.h index 68417d22d..c2c3b9005 100644 --- a/src/storage/SparseMatrix.h +++ b/src/storage/SparseMatrix.h @@ -705,6 +705,15 @@ namespace storm { * @return The product of the matrix and the given vector as the content of the given result vector. */ void multiplyWithVector(std::vector const& vector, std::vector& result) const; + + /*! + * Multiplies a single row of the matrix with the given vector and returns the result + * + * @param row The index of the row with which to multiply + * @param vector The vector with which to multiply the row. + * @return the result of the multiplication. + */ + value_type multiplyRowWithVector(index_type row, std::vector const& vector) const; /*! * Multiplies the vector to the matrix from the left and writes the result to the given result vector. diff --git a/src/utility/policyguessing.cpp b/src/utility/policyguessing.cpp index 69ad63b19..cfd471111 100644 --- a/src/utility/policyguessing.cpp +++ b/src/utility/policyguessing.cpp @@ -8,12 +8,11 @@ #include "src/utility/policyguessing.h" -#include "src/storage/SparseMatrix.h" #include "src/utility/macros.h" #include "src/utility/solver.h" #include "src/solver/LinearEquationSolver.h" -#include "src/solver/GameSolver.h" #include "graph.h" +#include "ConstantsComparator.h" namespace storm { namespace utility{ @@ -29,25 +28,121 @@ namespace storm { std::vector& player2Policy, storm::storage::BitVector const& targetChoices, ValueType const& prob0Value - ){ + ){ + + // std::vector pl1Policy = player1Policy; + // std::vector pl2Policy = player2Policy; + storm::storage::SparseMatrix inducedA; + std::vector inducedB; + storm::storage::BitVector probGreater0States; + getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States); + + solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value); - solveInducedEquationSystem(solver, x, b, player1Policy, player2Policy, targetChoices, prob0Value); solver.setPolicyTracking(); - solver.solveGame(player1Goal, player2Goal, x, b); - player1Policy = solver.getPlayer1Policy(); - player2Policy = solver.getPlayer2Policy(); + bool resultCorrect = false; + while(!resultCorrect){ + solver.solveGame(player1Goal, player2Goal, x, b); + player1Policy = solver.getPlayer1Policy(); + player2Policy = solver.getPlayer2Policy(); + + //Check if the policies makes choices that lead to states from which no target state is reachable ("prob0"-states). + getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States); + resultCorrect = checkAndFixPolicy(solver, x, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States); + if(!resultCorrect){ + //If the policy could not be fixed, it indicates that our guessed values were to high. + STORM_LOG_WARN("Policies could not be fixed. Restarting Gamesolver. "); + solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value); + //x = std::vector(x.size(), storm::utility::zero()); + } + } + /* + std::size_t p2Precount=0; + std::size_t p2Postcount=0; + std::size_t p1diff =0; + std::size_t p2diff =0; + std::size_t p2RelevantCount=0; + storm::storage::BitVector relevantP2Groups(pl2Policy.size(),false); + for(std::size_t i = 0; igetColumn()); + } + for (std::size_t i : relevantP2Groups){ + if(pl2Policy[i] != player2Policy[i]){ + ++p2RelevantCount; + } + } + for(std::size_t i = 0; i + void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver& solver, + std::vector& x, + std::vector const& b, + OptimizationDirection goal, + std::vector& policy, + storm::storage::BitVector const& targetChoices, + ValueType const& prob0Value + ){ + storm::storage::SparseMatrix inducedA; + std::vector inducedB; + storm::storage::BitVector probGreater0States; + getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States); + solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value); + + solver.setPolicyTracking(); + bool resultCorrect = false; + while(!resultCorrect){ + solver.solveEquationSystem(goal, x, b); + policy = solver.getPolicy(); + + //Check if the policy makes choices that lead to states from which no target state is reachable ("prob0"-states). + getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States); + resultCorrect = checkAndFixPolicy(solver, x, b, policy, targetChoices, inducedA, inducedB, probGreater0States); + + if(!resultCorrect){ + //If the policy could not be fixed, it indicates that our guessed values were to high. + STORM_LOG_WARN("Policy could not be fixed. Restarting MinMaxsolver." ); + solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value); + //x = std::vector(x.size(), storm::utility::zero()); + } + } + + + } + + template - void solveInducedEquationSystem(storm::solver::GameSolver const& solver, - std::vector& x, + void getInducedEquationSystem(storm::solver::GameSolver const& solver, std::vector const& b, std::vector const& player1Policy, std::vector const& player2Policy, storm::storage::BitVector const& targetChoices, - ValueType const& prob0Value){ - uint_fast64_t numberOfPlayer1States = x.size(); + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ){ + uint_fast64_t numberOfPlayer1States = solver.getPlayer1Matrix().getRowGroupCount(); //Get the rows of the player2matrix that are selected by the policies //Note that rows can be selected more then once and in an arbitrary order. @@ -58,10 +153,9 @@ namespace storm { uint_fast64_t pl2State = pl1Row.begin()->getColumn(); selectedRows[pl1State] = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State]; } - //Get the matrix A, vector b, and the targetStates induced by this selection - storm::storage::SparseMatrix inducedA = solver.getPlayer2Matrix().selectRowsFromRowIndexSequence(selectedRows, false); - std::vector inducedB(numberOfPlayer1States); + inducedA = solver.getPlayer2Matrix().selectRowsFromRowIndexSequence(selectedRows, false); + inducedB = std::vector(numberOfPlayer1States); storm::utility::vector::selectVectorValues(inducedB, selectedRows, b); storm::storage::BitVector inducedTarget(numberOfPlayer1States, false); for (uint_fast64_t pl1State = 0; pl1State < numberOfPlayer1States; ++pl1State){ @@ -69,57 +163,25 @@ namespace storm { inducedTarget.set(pl1State); } } - //Find the states from which no target state is reachable. - //Note that depending on the policies, qualitative properties might have changed which makes this step necessary. - storm::storage::BitVector probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfPlayer1States, true), inducedTarget); - - //Get the final A,x, and b and invoke linear equation solver - storm::storage::SparseMatrix subA = inducedA.getSubmatrix(true, probGreater0States, probGreater0States, true); - subA.convertToEquationSystem(); - std::vector subX(probGreater0States.getNumberOfSetBits()); - storm::utility::vector::selectVectorValues(subX, probGreater0States, x); - std::vector subB(probGreater0States.getNumberOfSetBits()); - storm::utility::vector::selectVectorValues(subB, probGreater0States, inducedB); - std::unique_ptr> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory().create(subA); - linEqSysSolver->solveEquationSystem(subX, subB); - - //fill in the result - storm::utility::vector::setVectorValues(x, probGreater0States, subX); - storm::utility::vector::setVectorValues(x, (~probGreater0States), prob0Value); + probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfPlayer1States, true), inducedTarget); } template - void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver& solver, - std::vector& x, - std::vector const& b, - OptimizationDirection goal, - std::vector& policy, - storm::storage::BitVector const& targetChoices, - ValueType const& prob0Value - ){ - - solveInducedEquationSystem(solver, x, b, policy, targetChoices, prob0Value); - solver.setPolicyTracking(); - solver.solveEquationSystem(goal, x, b); - policy = solver.getPolicy(); - } - - - template - void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver const& solver, - std::vector& x, + void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver const& solver, std::vector const& b, std::vector const& policy, storm::storage::BitVector const& targetChoices, - ValueType const& prob0Value + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States ){ - uint_fast64_t numberOfStates = x.size(); + uint_fast64_t numberOfStates = solver.getMatrix().getRowGroupCount(); //Get the matrix A, vector b, and the targetStates induced by the policy - storm::storage::SparseMatrix inducedA = solver.getMatrix().selectRowsFromRowGroups(policy, false); - std::vector inducedB(numberOfStates); + inducedA = solver.getMatrix().selectRowsFromRowGroups(policy, false); + inducedB = std::vector(numberOfStates); storm::utility::vector::selectVectorValues(inducedB, policy, solver.getMatrix().getRowGroupIndices(), b); storm::storage::BitVector inducedTarget(numberOfStates, false); for (uint_fast64_t state = 0; state < numberOfStates; ++state){ @@ -127,18 +189,24 @@ namespace storm { inducedTarget.set(state); } } - //Find the states from which no target state is reachable. - //Note that depending on the policies, qualitative properties might have changed which makes this step necessary. - storm::storage::BitVector probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfStates, true), inducedTarget); - - //Get the final A,x, and b and invoke linear equation solver - storm::storage::SparseMatrix subA = inducedA.getSubmatrix(true, probGreater0States, probGreater0States, true); + probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfStates, true), inducedTarget); + } + + template + void solveLinearEquationSystem(storm::storage::SparseMatrixconst& A, + std::vector& x, + std::vector const& b, + storm::storage::BitVector const& probGreater0States, + ValueType const& prob0Value + ){ + //Get the submatrix/subvector A,x, and b and invoke linear equation solver + storm::storage::SparseMatrix subA = A.getSubmatrix(true, probGreater0States, probGreater0States, true); subA.convertToEquationSystem(); std::vector subX(probGreater0States.getNumberOfSetBits()); storm::utility::vector::selectVectorValues(subX, probGreater0States, x); std::vector subB(probGreater0States.getNumberOfSetBits()); - storm::utility::vector::selectVectorValues(subB, probGreater0States, inducedB); + storm::utility::vector::selectVectorValues(subB, probGreater0States, b); std::unique_ptr> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory().create(subA); linEqSysSolver->solveEquationSystem(subX, subB); @@ -148,6 +216,150 @@ namespace storm { } + template + bool checkAndFixPolicy(storm::solver::GameSolver const& solver, + std::vector const& x, + std::vector const& b, + std::vector& player1Policy, + std::vector& player2Policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ){ + if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()) return true; + + bool policyChanged = true; + while(policyChanged){ + /* + * Lets try to fix the issue by doing other choices that are equally good. + * We change the policy in a state if the following conditions apply: + * 1. The current choice does not lead to target + * 2. There is another choice that leads to target + * 3. The value of that choice is equal to the value of the choice given by the policy + * Note that the values of the result will not change this way. + * We do this unil the policy does not change anymore + */ + policyChanged = false; + //Player 1: + for(uint_fast64_t pl1State=0; pl1State < player1Policy.size(); ++pl1State){ + uint_fast64_t pl1RowGroupIndex = solver.getPlayer1Matrix().getRowGroupIndices()[pl1State]; + //Check 1.: The current choice does not lead to target + if(!probGreater0States.get(pl1State)){ + //1. Is satisfied. Check 2.: There is another choice that leads to target + ValueType choiceValue = x[pl1State]; + for(uint_fast64_t otherChoice = 0; otherChoice < solver.getPlayer1Matrix().getRowGroupSize(pl1State); ++otherChoice){ + if(otherChoice == player1Policy[pl1State]) continue; + //the otherChoice selects a player2 state in which player2 makes his choice (according to the player2Policy). + uint_fast64_t pl2State = solver.getPlayer1Matrix().getRow(pl1RowGroupIndex + otherChoice).begin()->getColumn(); + uint_fast64_t pl2Row = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State]; + if(rowLeadsToTarget(pl2Row, solver.getPlayer2Matrix(), targetChoices, probGreater0States)){ + //2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy + ValueType otherValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2Row, x) + b[pl2Row]; + if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), !solver.getRelative())){ + //3. is satisfied. + player1Policy[pl1State] = otherChoice; + probGreater0States.set(pl1State); + policyChanged = true; + break; //no need to check other choices + } + } + } + } + } + //update probGreater0States + probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(probGreater0States.size(), true), probGreater0States); + //Player 2: + for(uint_fast64_t pl2State=0; pl2State < player2Policy.size(); ++pl2State){ + uint_fast64_t pl2RowGroupIndex = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State]; + //Check 1.: The current choice does not lead to target + if(!rowLeadsToTarget(pl2RowGroupIndex + player2Policy[pl2State], solver.getPlayer2Matrix(), targetChoices, probGreater0States)){ + //1. Is satisfied. Check 2. There is another choice that leads to target + ValueType choiceValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2RowGroupIndex + player2Policy[pl2State], x) + b[pl2RowGroupIndex + player2Policy[pl2State]]; + for(uint_fast64_t otherChoice = 0; otherChoice < solver.getPlayer2Matrix().getRowGroupSize(pl2State); ++otherChoice){ + if(otherChoice == player2Policy[pl2State]) continue; + if(rowLeadsToTarget(pl2RowGroupIndex + otherChoice, solver.getPlayer2Matrix(), targetChoices, probGreater0States)){ + //2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy + ValueType otherValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2RowGroupIndex + otherChoice, x) + b[pl2RowGroupIndex + otherChoice]; + if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), solver.getRelative())){ + //3. is satisfied. + player2Policy[pl2State] = otherChoice; + policyChanged = true; + break; //no need to check other choices + } + } + } + } + } + + //update probGreater0States + getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States); + if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()){ + return true; + } + } + //Reaching this point means that the policy does not change anymore and we could not fix it. + return false; + } + + template + bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver const& solver, + std::vector const& x, + std::vector const& b, + std::vector& policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ){ + if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()) return true; + + bool policyChanged = true; + while(policyChanged){ + /* + * Lets try to fix the issue by doing other choices that are equally good. + * We change the policy in a state if the following conditions apply: + * 1. The current choice does not lead to target + * 2. There is another choice that leads to target + * 3. The value of that choice is equal to the value of the choice given by the policy + * Note that the values of the result will not change this way. + * We do this unil the policy does not change anymore + */ + policyChanged = false; + for(uint_fast64_t state=0; state < policy.size(); ++state){ + uint_fast64_t rowGroupIndex = solver.getMatrix().getRowGroupIndices()[state]; + //Check 1.: The current choice does not lead to target + if(!probGreater0States.get(state)){ + //1. Is satisfied. Check 2.: There is another choice that leads to target + ValueType choiceValue = x[state]; + for(uint_fast64_t otherChoice = 0; otherChoice < solver.getMatrix().getRowGroupSize(state); ++otherChoice){ + if(otherChoice == policy[state]) continue; + if(rowLeadsToTarget(rowGroupIndex + otherChoice, solver.getMatrix(), targetChoices, probGreater0States)){ + //2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy + ValueType otherValue = solver.getMatrix().multiplyRowWithVector(rowGroupIndex + otherChoice, x) + b[rowGroupIndex + otherChoice]; + if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), !solver.getRelative())){ + //3. is satisfied. + policy[state] = otherChoice; + probGreater0States.set(state); + policyChanged = true; + break; //no need to check other choices + } + } + } + } + } + + //update probGreater0States and equation system + getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States); + if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()){ + return true; + } + } + //Reaching this point means that the policy does not change anymore and we could not fix it. + return false; + } + + template void solveGame( storm::solver::GameSolver& solver, std::vector& x, std::vector const& b, @@ -159,15 +371,6 @@ namespace storm { double const& prob0Value ); - template void solveInducedEquationSystem(storm::solver::GameSolver const& solver, - std::vector& x, - std::vector const& b, - std::vector const& player1Policy, - std::vector const& player2Policy, - storm::storage::BitVector const& targetChoices, - double const& prob0Value - ); - template void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver& solver, std::vector& x, std::vector const& b, @@ -177,13 +380,52 @@ namespace storm { double const& prob0Value ); - template void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver const& solver, - std::vector& x, - std::vector const& b, - std::vector const& policy, - storm::storage::BitVector const& targetChoices, - double const& prob0Value + template void getInducedEquationSystem(storm::solver::GameSolver const& solver, + std::vector const& b, + std::vector const& player1Policy, + std::vector const& player2Policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + template void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver const& solver, + std::vector const& b, + std::vector const& policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + template void solveLinearEquationSystem(storm::storage::SparseMatrixconst& A, + std::vector& x, + std::vector const& b, + storm::storage::BitVector const& probGreater0States, + double const& prob0Value ); + + template bool checkAndFixPolicy(storm::solver::GameSolver const& solver, + std::vector const& x, + std::vector const& b, + std::vector& player1Policy, + std::vector& player2Policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + template bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver const& solver, + std::vector const& x, + std::vector const& b, + std::vector& policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); } } diff --git a/src/utility/policyguessing.h b/src/utility/policyguessing.h index a66cb19ca..5c67c4271 100644 --- a/src/utility/policyguessing.h +++ b/src/utility/policyguessing.h @@ -18,6 +18,7 @@ #include "src/utility/vector.h" #include "src/storage/BitVector.h" #include "src/storage/sparse/StateType.h" +#include "src/storage/SparseMatrix.h" namespace storm { namespace utility{ @@ -57,37 +58,6 @@ namespace storm { ValueType const& prob0Value ); - - /*! - * Solves the equation system defined by the matrix A and vector b' that result from applying - * the given policies to the matrices of the two players and the given b. - * - * Note that, depending on the policies, the qualitative properties of the graph defined by A - * might be different to the original graph of the game. - * To ensure a unique solution, we need to filter out the "prob0"-states. - * (Notice that new "prob1"-states do not harm as actual target states are always excluded, independent of the choice) - * - * @param solver the solver that contains the two player matrices - * @param x The initial guess of the solution. - * @param b The vector in which to select the entries of the right hand side - * @param player1Policy A policy that selects rows in every rowgroup of player1. - * @param player2Policy A policy that selects rows in every rowgroup of player2. - * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state - * @param prob0Value the value that is assigned to the states that have probability zero to reach a target - * @return The solution vector in the form of the vector x. - */ - template - void solveInducedEquationSystem(storm::solver::GameSolver const& solver, - std::vector& x, - std::vector const& b, - std::vector const& player1Policy, - std::vector const& player2Policy, - storm::storage::BitVector const& targetChoices, - ValueType const& prob0Value - ); - - - /*! * invokes the given MinMaxLinearEquationSolver. * @@ -100,7 +70,7 @@ namespace storm { * To identify these states and set the result for them correctly, it is necessary to know whether rewards or probabilities are to be computed * - * @param solver the solver that contains the two player matrices + * @param solver the solver that contains the matrix * @param x The initial guess of the solution. * @param b The vector to add after matrix-vector multiplication. * @param goal Sets whether we want to minimize or maximize. @@ -120,31 +90,151 @@ namespace storm { ); /*! - * Solves the equation system defined by the matrix A and vector b' that result from applying - * the given policy to the matrices of the two players and the given b. + * Constructs the equation system defined by the matrix inducedA and vector inducedB that result from applying + * the given policies to the matrices of the two players and the given b. * - * Note that, depending on the policy, the qualitative properties of the graph defined by A - * might be different to the original graph - * To ensure a unique solution, we need to filter out the "prob0"-states. - * (Notice that new "prob1"-states do not harm as actual target states are always excluded, independent of the choice) - * - * @param solverthe solver that contains the two player matrices - * @param x The initial guess of the solution. + * Note that, depending on the policies, the qualitative properties of the graph defined by inducedA + * might be different to the original graph. + * + * @param solver the solver that contains the two player matrices + * @param b The vector in which to select the entries of the right hand side + * @param player1Policy A policy that selects rows in every rowgroup of player1. + * @param player2Policy A policy that selects rows in every rowgroup of player2. + * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state + * @param inducedA the Matrix for the resulting equation system + * @param inducedB the Vector for the resulting equation system + * @param probGreater0States marks the states which have a positive probability to lead to a target state + * @return Induced A, b and targets + */ + template + void getInducedEquationSystem(storm::solver::GameSolver const& solver, + std::vector const& b, + std::vector const& player1Policy, + std::vector const& player2Policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + /*! + * Constructs the equation system defined by the matrix inducedA and vector inducedB that result from applying + * the given policy to the matrix from the given solver and the given b. + * + * Note that, depending on the policies, the qualitative properties of the graph defined by inducedA + * might be different to the original graph. + * + * @param solver the solver that contains the matrix * @param b The vector in which to select the entries of the right hand side * @param policy A policy that selects rows in every rowgroup. - * @param targetChoices marks the rows in the matrix that have a positive probability to lead to a target state - * @param prob0Value the value that is assigned to the states that have probability zero to reach a target - * @return The solution vector in the form of the vector x. + * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state + * @param inducedA the Matrix for the resulting equation system + * @param inducedB the Vector for the resulting equation system + * @param probGreater0States marks the states which have a positive probability to lead to a target state + * @return Induced A, b and targets */ template - void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver const& solver, - std::vector& x, + void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver const& solver, std::vector const& b, std::vector const& policy, storm::storage::BitVector const& targetChoices, - ValueType const& prob0Value + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + /*! + * Solves the given equation system. + * + * It is not assumed that qualitative properties of the Graph defined by A have been checked, yet. + * However, actual target states are already filtered out. + * To ensure a unique solution, we also need to filter out the "prob0"-states. + * + * @param A the matrix of the equation system + * @param x The initial guess of the solution. + * @param b The vector of the equation system + * @param targetChoices marks the rows in the matrix that have a positive probability to lead to a target state + * @param prob0Value the value that is assigned to the states that have probability zero to reach a target + * @return The solution vector in the form of the vector x. + */ + template + void solveLinearEquationSystem(storm::storage::SparseMatrixconst& A, + std::vector& x, + std::vector const& b, + storm::storage::BitVector const& probGreater0States, + ValueType const& prob0Value ); + /*! + * Checks if the given policies make choices that lead to states from which no target state is reachable ("prob0"-states). + * This can happen when value iteration is applied and there are multiple choices with the same value + * (e.g. a state that allows to chose a selfloop with probability one) + * + * If the policies are changed, they are updated accordingly (as well as the given inducedA, inducedB and probGreater0States) + * + * @param solver the solver that contains the two player matrices + * @param x the solution vector (the result from value iteration) + * @param b The vector in which to select the entries of the right hand side + * @param player1Policy A policy that selects rows in every rowgroup of player1. + * @param player2Policy A policy that selects rows in every rowgroup of player2. + * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state + * @param inducedA the Matrix for the equation system + * @param inducedB the Vector for the equation system + * @param probGreater0States marks the states which have a positive probability to lead to a target state + * @return true iff there are no more prob0-states. Also changes the given policies accordingly + */ + template + bool checkAndFixPolicy(storm::solver::GameSolver const& solver, + std::vector const& x, + std::vector const& b, + std::vector& player1Policy, + std::vector& player2Policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + /*! + * Checks if the given policies make choices that lead to states from which no target state is reachable ("prob0"-states). + * This can happen when value iteration is applied and there are multiple choices with the same value + * (e.g. a state that allows to chose a selfloop with probability one) + * + * If the policies are changed, they are updated accordingly (as well as the given inducedA, inducedB and probGreater0States) + * + * @param solver the solver that contains the two player matrices + * @param x the solution vector (the result from value iteration) + * @param b The vector in which to select the entries of the right hand side + * @param policy A policy that selects rows in every rowgroup. + * @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state + * @param inducedA the Matrix for the equation system + * @param inducedB the Vector for the equation system + * @param probGreater0States marks the states which have a positive probability to lead to a target state + * @return true iff there are no more prob0-states. Also changes the given policies accordingly + */ + template + bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver const& solver, + std::vector const& x, + std::vector const& b, + std::vector& policy, + storm::storage::BitVector const& targetChoices, + storm::storage::SparseMatrix& inducedA, + std::vector& inducedB, + storm::storage::BitVector& probGreater0States + ); + + //little helper function + template + bool rowLeadsToTarget(uint_fast64_t row, + storm::storage::SparseMatrix const& matrix, + storm::storage::BitVector const& targetChoices, + storm::storage::BitVector const& probGreater0States){ + if(targetChoices.get(row)) return true; + for(auto const& successor : matrix.getRow(row)){ + if(probGreater0States.get(successor.getColumn())) return true; + } + return false; + } } } }