Browse Source

Work on policy recycling

Former-commit-id: b8e94254ce
tempestpy_adaptions
TimQu 9 years ago
parent
commit
9c645ba24b
  1. 6
      src/modelchecker/region/ApproximationModel.cpp
  2. 9
      src/solver/AbstractGameSolver.cpp
  3. 2
      src/solver/AbstractGameSolver.h
  4. 9
      src/solver/MinMaxLinearEquationSolver.cpp
  5. 3
      src/solver/MinMaxLinearEquationSolver.h
  6. 9
      src/storage/SparseMatrix.cpp
  7. 9
      src/storage/SparseMatrix.h
  8. 368
      src/utility/policyguessing.cpp
  9. 174
      src/utility/policyguessing.h

6
src/modelchecker/region/ApproximationModel.cpp

@ -67,13 +67,13 @@ namespace storm {
this->vectorData.assignment.shrink_to_fit(); this->vectorData.assignment.shrink_to_fit();
if(parametricModel.getType()==storm::models::ModelType::Mdp){ if(parametricModel.getType()==storm::models::ModelType::Mdp){
initializePlayer1Matrix(parametricModel); initializePlayer1Matrix(parametricModel);
this->solverData.lastPlayer1Policy = Policy(this->solverData.player1Matrix.getRowGroupCount(), 0);
} }
this->solverData.result = std::vector<ConstantType>(maybeStates.getNumberOfSetBits(), this->computeRewards ? storm::utility::one<ConstantType>() : ConstantType(0.5)); this->solverData.result = std::vector<ConstantType>(maybeStates.getNumberOfSetBits(), this->computeRewards ? storm::utility::one<ConstantType>() : ConstantType(0.5));
this->solverData.initialStateIndex = newIndices[initialState]; this->solverData.initialStateIndex = newIndices[initialState];
this->solverData.lastMinimizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0); this->solverData.lastMinimizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0);
this->solverData.lastMaximizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0); this->solverData.lastMaximizingPolicy = Policy(this->matrixData.matrix.getRowGroupCount(), 0);
this->solverData.lastPlayer1Policy = Policy(this->matrixData.matrix.getRowGroupCount(), 0);
} }
template<typename ParametricSparseModelType, typename ConstantType> template<typename ParametricSparseModelType, typename ConstantType>
@ -325,9 +325,9 @@ namespace storm {
} }
// std::cout << "Used Approximation" << std::endl; // std::cout << "Used Approximation" << std::endl;
for (auto const& varcount : VarCount){ for (auto const& varcount : VarCount){
if(varcount.second.first > 0 && varcount.second.second > 0){
// if(varcount.second.first > 0 && varcount.second.second > 0){
// std::cout << " Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl; // std::cout << " Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl;
}
// }
} }
for (auto const& fixVar : fixedVariables){ for (auto const& fixVar : fixedVariables){
//std::cout << " APPROXMODEL: variable " << fixVar.first << " is always mapped to " << fixVar.second << std::endl; //std::cout << " APPROXMODEL: variable " << fixVar.first << " is always mapped to " << fixVar.second << std::endl;

9
src/solver/AbstractGameSolver.cpp

@ -32,5 +32,14 @@ namespace storm {
assert(!this->player2Policy.empty()); assert(!this->player2Policy.empty());
return player2Policy; return player2Policy;
} }
double AbstractGameSolver::getPrecision() const {
return precision;
}
bool AbstractGameSolver::getRelative() const {
return relative;
}
} }
} }

2
src/solver/AbstractGameSolver.h

@ -30,6 +30,8 @@ namespace storm {
std::vector<storm::storage::sparse::state_type> getPlayer1Policy() const; std::vector<storm::storage::sparse::state_type> getPlayer1Policy() const;
std::vector<storm::storage::sparse::state_type> getPlayer2Policy() const; std::vector<storm::storage::sparse::state_type> getPlayer2Policy() const;
double getPrecision() const;
bool getRelative() const;
protected: protected:
// The precision to achieve. // The precision to achieve.

9
src/solver/MinMaxLinearEquationSolver.cpp

@ -27,5 +27,14 @@ namespace storm {
assert(!policy.empty()); assert(!policy.empty());
return policy; return policy;
} }
double AbstractMinMaxLinearEquationSolver::getPrecision() const {
return precision;
}
bool AbstractMinMaxLinearEquationSolver::getRelative() const {
return relative;
}
} }
} }

3
src/solver/MinMaxLinearEquationSolver.h

@ -27,6 +27,9 @@ namespace storm {
std::vector<storm::storage::sparse::state_type> getPolicy() const; std::vector<storm::storage::sparse::state_type> getPolicy() const;
double getPrecision() const;
bool getRelative() const;
void setOptimizationDirection(OptimizationDirection d) { void setOptimizationDirection(OptimizationDirection d) {
direction = convert(d); direction = convert(d);
} }

9
src/storage/SparseMatrix.cpp

@ -999,6 +999,15 @@ namespace storm {
} }
#endif #endif
template<typename ValueType>
ValueType SparseMatrix<ValueType>::multiplyRowWithVector(index_type row, std::vector<ValueType> const& vector) const {
ValueType result = storm::utility::zero<ValueType>();
for(auto const& entry : this->getRow(row)){
result += entry.getValue() * vector[entry.getColumn()];
}
return result;
}
template<typename ValueType> template<typename ValueType>
void SparseMatrix<ValueType>::performSuccessiveOverRelaxationStep(ValueType omega, std::vector<ValueType>& x, std::vector<ValueType> const& b) const { void SparseMatrix<ValueType>::performSuccessiveOverRelaxationStep(ValueType omega, std::vector<ValueType>& x, std::vector<ValueType> const& b) const {
const_iterator it = this->begin(); const_iterator it = this->begin();

9
src/storage/SparseMatrix.h

@ -706,6 +706,15 @@ namespace storm {
*/ */
void multiplyWithVector(std::vector<value_type> const& vector, std::vector<value_type>& result) const; void multiplyWithVector(std::vector<value_type> const& vector, std::vector<value_type>& result) const;
/*!
* Multiplies a single row of the matrix with the given vector and returns the result
*
* @param row The index of the row with which to multiply
* @param vector The vector with which to multiply the row.
* @return the result of the multiplication.
*/
value_type multiplyRowWithVector(index_type row, std::vector<value_type> const& vector) const;
/*! /*!
* Multiplies the vector to the matrix from the left and writes the result to the given result vector. * Multiplies the vector to the matrix from the left and writes the result to the given result vector.
* *

368
src/utility/policyguessing.cpp

@ -8,12 +8,11 @@
#include "src/utility/policyguessing.h" #include "src/utility/policyguessing.h"
#include "src/storage/SparseMatrix.h"
#include "src/utility/macros.h" #include "src/utility/macros.h"
#include "src/utility/solver.h" #include "src/utility/solver.h"
#include "src/solver/LinearEquationSolver.h" #include "src/solver/LinearEquationSolver.h"
#include "src/solver/GameSolver.h"
#include "graph.h" #include "graph.h"
#include "ConstantsComparator.h"
namespace storm { namespace storm {
namespace utility{ namespace utility{
@ -31,23 +30,119 @@ namespace storm {
ValueType const& prob0Value ValueType const& prob0Value
){ ){
solveInducedEquationSystem(solver, x, b, player1Policy, player2Policy, targetChoices, prob0Value);
// std::vector<storm::storage::sparse::state_type> pl1Policy = player1Policy;
// std::vector<storm::storage::sparse::state_type> pl2Policy = player2Policy;
storm::storage::SparseMatrix<ValueType> inducedA;
std::vector<ValueType> inducedB;
storm::storage::BitVector probGreater0States;
getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
solver.setPolicyTracking(); solver.setPolicyTracking();
bool resultCorrect = false;
while(!resultCorrect){
solver.solveGame(player1Goal, player2Goal, x, b); solver.solveGame(player1Goal, player2Goal, x, b);
player1Policy = solver.getPlayer1Policy(); player1Policy = solver.getPlayer1Policy();
player2Policy = solver.getPlayer2Policy(); player2Policy = solver.getPlayer2Policy();
//Check if the policies makes choices that lead to states from which no target state is reachable ("prob0"-states).
getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
resultCorrect = checkAndFixPolicy(solver, x, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
if(!resultCorrect){
//If the policy could not be fixed, it indicates that our guessed values were to high.
STORM_LOG_WARN("Policies could not be fixed. Restarting Gamesolver. ");
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
//x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
}
}
/*
std::size_t p2Precount=0;
std::size_t p2Postcount=0;
std::size_t p1diff =0;
std::size_t p2diff =0;
std::size_t p2RelevantCount=0;
storm::storage::BitVector relevantP2Groups(pl2Policy.size(),false);
for(std::size_t i = 0; i<pl1Policy.size(); ++i){
if(pl1Policy[i] != player1Policy[i]){
++p1diff;
}
std::size_t row = solver.getPlayer1Matrix().getRowGroupIndices()[i] + player1Policy[i];
auto rowObj = solver.getPlayer1Matrix().getRow(row);
relevantP2Groups.set(rowObj.begin()->getColumn());
}
for (std::size_t i : relevantP2Groups){
if(pl2Policy[i] != player2Policy[i]){
++p2RelevantCount;
}
} }
for(std::size_t i = 0; i<pl2Policy.size(); ++i){
if(pl2Policy[i] != player2Policy[i]){
++p2diff;
}
p2Precount += pl2Policy[i];
p2Postcount += player2Policy[i];
}
std::cout << "P1: " << (player1Goal == OptimizationDirection::Minimize ? "MIN " : "MAX ");
std::cout << "P2: " << (player2Goal == OptimizationDirection::Minimize ? "MIN " : "MAX ");
std::cout << "Changes: P1: " << p1diff;
std::cout << " P2: " << p2diff << " (" << p2RelevantCount << " relevant)";
std::cout << " Counts P2: " << p2Precount << " and " << p2Postcount << ".";
std::cout << std::endl;
*/
}
template <typename ValueType> template <typename ValueType>
void solveInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver<ValueType>& solver,
std::vector<ValueType>& x, std::vector<ValueType>& x,
std::vector<ValueType> const& b,
OptimizationDirection goal,
std::vector<storm::storage::sparse::state_type>& policy,
storm::storage::BitVector const& targetChoices,
ValueType const& prob0Value
){
storm::storage::SparseMatrix<ValueType> inducedA;
std::vector<ValueType> inducedB;
storm::storage::BitVector probGreater0States;
getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
solver.setPolicyTracking();
bool resultCorrect = false;
while(!resultCorrect){
solver.solveEquationSystem(goal, x, b);
policy = solver.getPolicy();
//Check if the policy makes choices that lead to states from which no target state is reachable ("prob0"-states).
getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
resultCorrect = checkAndFixPolicy(solver, x, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
if(!resultCorrect){
//If the policy could not be fixed, it indicates that our guessed values were to high.
STORM_LOG_WARN("Policy could not be fixed. Restarting MinMaxsolver." );
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
//x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
}
}
}
template <typename ValueType>
void getInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
std::vector<ValueType> const& b, std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& player1Policy, std::vector<storm::storage::sparse::state_type> const& player1Policy,
std::vector<storm::storage::sparse::state_type> const& player2Policy, std::vector<storm::storage::sparse::state_type> const& player2Policy,
storm::storage::BitVector const& targetChoices, storm::storage::BitVector const& targetChoices,
ValueType const& prob0Value){
uint_fast64_t numberOfPlayer1States = x.size();
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
){
uint_fast64_t numberOfPlayer1States = solver.getPlayer1Matrix().getRowGroupCount();
//Get the rows of the player2matrix that are selected by the policies //Get the rows of the player2matrix that are selected by the policies
//Note that rows can be selected more then once and in an arbitrary order. //Note that rows can be selected more then once and in an arbitrary order.
@ -58,10 +153,9 @@ namespace storm {
uint_fast64_t pl2State = pl1Row.begin()->getColumn(); uint_fast64_t pl2State = pl1Row.begin()->getColumn();
selectedRows[pl1State] = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State]; selectedRows[pl1State] = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State];
} }
//Get the matrix A, vector b, and the targetStates induced by this selection //Get the matrix A, vector b, and the targetStates induced by this selection
storm::storage::SparseMatrix<ValueType> inducedA = solver.getPlayer2Matrix().selectRowsFromRowIndexSequence(selectedRows, false);
std::vector<ValueType> inducedB(numberOfPlayer1States);
inducedA = solver.getPlayer2Matrix().selectRowsFromRowIndexSequence(selectedRows, false);
inducedB = std::vector<ValueType>(numberOfPlayer1States);
storm::utility::vector::selectVectorValues<ValueType>(inducedB, selectedRows, b); storm::utility::vector::selectVectorValues<ValueType>(inducedB, selectedRows, b);
storm::storage::BitVector inducedTarget(numberOfPlayer1States, false); storm::storage::BitVector inducedTarget(numberOfPlayer1States, false);
for (uint_fast64_t pl1State = 0; pl1State < numberOfPlayer1States; ++pl1State){ for (uint_fast64_t pl1State = 0; pl1State < numberOfPlayer1States; ++pl1State){
@ -69,18 +163,50 @@ namespace storm {
inducedTarget.set(pl1State); inducedTarget.set(pl1State);
} }
} }
//Find the states from which no target state is reachable.
probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfPlayer1States, true), inducedTarget);
}
template <typename ValueType>
void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
){
uint_fast64_t numberOfStates = solver.getMatrix().getRowGroupCount();
//Get the matrix A, vector b, and the targetStates induced by the policy
inducedA = solver.getMatrix().selectRowsFromRowGroups(policy, false);
inducedB = std::vector<ValueType>(numberOfStates);
storm::utility::vector::selectVectorValues<ValueType>(inducedB, policy, solver.getMatrix().getRowGroupIndices(), b);
storm::storage::BitVector inducedTarget(numberOfStates, false);
for (uint_fast64_t state = 0; state < numberOfStates; ++state){
if(targetChoices.get(solver.getMatrix().getRowGroupIndices()[state] + policy[state])){
inducedTarget.set(state);
}
}
//Find the states from which no target state is reachable. //Find the states from which no target state is reachable.
//Note that depending on the policies, qualitative properties might have changed which makes this step necessary.
storm::storage::BitVector probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfPlayer1States, true), inducedTarget);
probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfStates, true), inducedTarget);
}
//Get the final A,x, and b and invoke linear equation solver
storm::storage::SparseMatrix<ValueType> subA = inducedA.getSubmatrix(true, probGreater0States, probGreater0States, true);
template<typename ValueType>
void solveLinearEquationSystem(storm::storage::SparseMatrix<ValueType>const& A,
std::vector<ValueType>& x,
std::vector<ValueType> const& b,
storm::storage::BitVector const& probGreater0States,
ValueType const& prob0Value
){
//Get the submatrix/subvector A,x, and b and invoke linear equation solver
storm::storage::SparseMatrix<ValueType> subA = A.getSubmatrix(true, probGreater0States, probGreater0States, true);
subA.convertToEquationSystem(); subA.convertToEquationSystem();
std::vector<ValueType> subX(probGreater0States.getNumberOfSetBits()); std::vector<ValueType> subX(probGreater0States.getNumberOfSetBits());
storm::utility::vector::selectVectorValues(subX, probGreater0States, x); storm::utility::vector::selectVectorValues(subX, probGreater0States, x);
std::vector<ValueType> subB(probGreater0States.getNumberOfSetBits()); std::vector<ValueType> subB(probGreater0States.getNumberOfSetBits());
storm::utility::vector::selectVectorValues(subB, probGreater0States, inducedB);
storm::utility::vector::selectVectorValues(subB, probGreater0States, b);
std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA); std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
linEqSysSolver->solveEquationSystem(subX, subB); linEqSysSolver->solveEquationSystem(subX, subB);
@ -91,60 +217,146 @@ namespace storm {
template <typename ValueType> template <typename ValueType>
void solveMinMaxLinearEquationSystem( storm::solver::MinMaxLinearEquationSolver<ValueType>& solver,
std::vector<ValueType>& x,
bool checkAndFixPolicy(storm::solver::GameSolver<ValueType> const& solver,
std::vector<ValueType> const& x,
std::vector<ValueType> const& b, std::vector<ValueType> const& b,
OptimizationDirection goal,
std::vector<storm::storage::sparse::state_type>& policy,
std::vector<storm::storage::sparse::state_type>& player1Policy,
std::vector<storm::storage::sparse::state_type>& player2Policy,
storm::storage::BitVector const& targetChoices, storm::storage::BitVector const& targetChoices,
ValueType const& prob0Value
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
){ ){
if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()) return true;
solveInducedEquationSystem(solver, x, b, policy, targetChoices, prob0Value);
solver.setPolicyTracking();
solver.solveEquationSystem(goal, x, b);
policy = solver.getPolicy();
bool policyChanged = true;
while(policyChanged){
/*
* Lets try to fix the issue by doing other choices that are equally good.
* We change the policy in a state if the following conditions apply:
* 1. The current choice does not lead to target
* 2. There is another choice that leads to target
* 3. The value of that choice is equal to the value of the choice given by the policy
* Note that the values of the result will not change this way.
* We do this unil the policy does not change anymore
*/
policyChanged = false;
//Player 1:
for(uint_fast64_t pl1State=0; pl1State < player1Policy.size(); ++pl1State){
uint_fast64_t pl1RowGroupIndex = solver.getPlayer1Matrix().getRowGroupIndices()[pl1State];
//Check 1.: The current choice does not lead to target
if(!probGreater0States.get(pl1State)){
//1. Is satisfied. Check 2.: There is another choice that leads to target
ValueType choiceValue = x[pl1State];
for(uint_fast64_t otherChoice = 0; otherChoice < solver.getPlayer1Matrix().getRowGroupSize(pl1State); ++otherChoice){
if(otherChoice == player1Policy[pl1State]) continue;
//the otherChoice selects a player2 state in which player2 makes his choice (according to the player2Policy).
uint_fast64_t pl2State = solver.getPlayer1Matrix().getRow(pl1RowGroupIndex + otherChoice).begin()->getColumn();
uint_fast64_t pl2Row = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State] + player2Policy[pl2State];
if(rowLeadsToTarget(pl2Row, solver.getPlayer2Matrix(), targetChoices, probGreater0States)){
//2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy
ValueType otherValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2Row, x) + b[pl2Row];
if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), !solver.getRelative())){
//3. is satisfied.
player1Policy[pl1State] = otherChoice;
probGreater0States.set(pl1State);
policyChanged = true;
break; //no need to check other choices
}
}
}
}
}
//update probGreater0States
probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(probGreater0States.size(), true), probGreater0States);
//Player 2:
for(uint_fast64_t pl2State=0; pl2State < player2Policy.size(); ++pl2State){
uint_fast64_t pl2RowGroupIndex = solver.getPlayer2Matrix().getRowGroupIndices()[pl2State];
//Check 1.: The current choice does not lead to target
if(!rowLeadsToTarget(pl2RowGroupIndex + player2Policy[pl2State], solver.getPlayer2Matrix(), targetChoices, probGreater0States)){
//1. Is satisfied. Check 2. There is another choice that leads to target
ValueType choiceValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2RowGroupIndex + player2Policy[pl2State], x) + b[pl2RowGroupIndex + player2Policy[pl2State]];
for(uint_fast64_t otherChoice = 0; otherChoice < solver.getPlayer2Matrix().getRowGroupSize(pl2State); ++otherChoice){
if(otherChoice == player2Policy[pl2State]) continue;
if(rowLeadsToTarget(pl2RowGroupIndex + otherChoice, solver.getPlayer2Matrix(), targetChoices, probGreater0States)){
//2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy
ValueType otherValue = solver.getPlayer2Matrix().multiplyRowWithVector(pl2RowGroupIndex + otherChoice, x) + b[pl2RowGroupIndex + otherChoice];
if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), solver.getRelative())){
//3. is satisfied.
player2Policy[pl2State] = otherChoice;
policyChanged = true;
break; //no need to check other choices
}
}
}
}
} }
//update probGreater0States
getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()){
return true;
}
}
//Reaching this point means that the policy does not change anymore and we could not fix it.
return false;
}
template <typename ValueType> template <typename ValueType>
void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
std::vector<ValueType>& x,
bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
std::vector<ValueType> const& x,
std::vector<ValueType> const& b, std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& policy,
std::vector<storm::storage::sparse::state_type>& policy,
storm::storage::BitVector const& targetChoices, storm::storage::BitVector const& targetChoices,
ValueType const& prob0Value
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
){ ){
uint_fast64_t numberOfStates = x.size();
if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()) return true;
//Get the matrix A, vector b, and the targetStates induced by the policy
storm::storage::SparseMatrix<ValueType> inducedA = solver.getMatrix().selectRowsFromRowGroups(policy, false);
std::vector<ValueType> inducedB(numberOfStates);
storm::utility::vector::selectVectorValues<ValueType>(inducedB, policy, solver.getMatrix().getRowGroupIndices(), b);
storm::storage::BitVector inducedTarget(numberOfStates, false);
for (uint_fast64_t state = 0; state < numberOfStates; ++state){
if(targetChoices.get(solver.getMatrix().getRowGroupIndices()[state] + policy[state])){
inducedTarget.set(state);
bool policyChanged = true;
while(policyChanged){
/*
* Lets try to fix the issue by doing other choices that are equally good.
* We change the policy in a state if the following conditions apply:
* 1. The current choice does not lead to target
* 2. There is another choice that leads to target
* 3. The value of that choice is equal to the value of the choice given by the policy
* Note that the values of the result will not change this way.
* We do this unil the policy does not change anymore
*/
policyChanged = false;
for(uint_fast64_t state=0; state < policy.size(); ++state){
uint_fast64_t rowGroupIndex = solver.getMatrix().getRowGroupIndices()[state];
//Check 1.: The current choice does not lead to target
if(!probGreater0States.get(state)){
//1. Is satisfied. Check 2.: There is another choice that leads to target
ValueType choiceValue = x[state];
for(uint_fast64_t otherChoice = 0; otherChoice < solver.getMatrix().getRowGroupSize(state); ++otherChoice){
if(otherChoice == policy[state]) continue;
if(rowLeadsToTarget(rowGroupIndex + otherChoice, solver.getMatrix(), targetChoices, probGreater0States)){
//2. is satisfied. Check 3. The value of that choice is equal to the value of the choice given by the policy
ValueType otherValue = solver.getMatrix().multiplyRowWithVector(rowGroupIndex + otherChoice, x) + b[rowGroupIndex + otherChoice];
if(storm::utility::vector::equalModuloPrecision(choiceValue, otherValue, solver.getPrecision(), !solver.getRelative())){
//3. is satisfied.
policy[state] = otherChoice;
probGreater0States.set(state);
policyChanged = true;
break; //no need to check other choices
}
}
}
} }
} }
//Find the states from which no target state is reachable.
//Note that depending on the policies, qualitative properties might have changed which makes this step necessary.
storm::storage::BitVector probGreater0States = storm::utility::graph::performProbGreater0(inducedA.transpose(), storm::storage::BitVector(numberOfStates, true), inducedTarget);
//Get the final A,x, and b and invoke linear equation solver
storm::storage::SparseMatrix<ValueType> subA = inducedA.getSubmatrix(true, probGreater0States, probGreater0States, true);
subA.convertToEquationSystem();
std::vector<ValueType> subX(probGreater0States.getNumberOfSetBits());
storm::utility::vector::selectVectorValues(subX, probGreater0States, x);
std::vector<ValueType> subB(probGreater0States.getNumberOfSetBits());
storm::utility::vector::selectVectorValues(subB, probGreater0States, inducedB);
std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
linEqSysSolver->solveEquationSystem(subX, subB);
//fill in the result
storm::utility::vector::setVectorValues(x, probGreater0States, subX);
storm::utility::vector::setVectorValues(x, (~probGreater0States), prob0Value);
//update probGreater0States and equation system
getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
if(probGreater0States.getNumberOfSetBits() == probGreater0States.size()){
return true;
}
}
//Reaching this point means that the policy does not change anymore and we could not fix it.
return false;
} }
@ -159,32 +371,62 @@ namespace storm {
double const& prob0Value double const& prob0Value
); );
template void solveInducedEquationSystem<double>(storm::solver::GameSolver<double> const& solver,
template void solveMinMaxLinearEquationSystem<double>( storm::solver::MinMaxLinearEquationSolver<double>& solver,
std::vector<double>& x, std::vector<double>& x,
std::vector<double> const& b, std::vector<double> const& b,
std::vector<storm::storage::sparse::state_type> const& player1Policy,
std::vector<storm::storage::sparse::state_type> const& player2Policy,
OptimizationDirection goal,
std::vector<storm::storage::sparse::state_type>& policy,
storm::storage::BitVector const& targetChoices, storm::storage::BitVector const& targetChoices,
double const& prob0Value double const& prob0Value
); );
template void solveMinMaxLinearEquationSystem<double>( storm::solver::MinMaxLinearEquationSolver<double>& solver,
std::vector<double>& x,
template void getInducedEquationSystem<double>(storm::solver::GameSolver<double> const& solver,
std::vector<double> const& b, std::vector<double> const& b,
OptimizationDirection goal,
std::vector<storm::storage::sparse::state_type>& policy,
std::vector<storm::storage::sparse::state_type> const& player1Policy,
std::vector<storm::storage::sparse::state_type> const& player2Policy,
storm::storage::BitVector const& targetChoices, storm::storage::BitVector const& targetChoices,
double const& prob0Value
storm::storage::SparseMatrix<double>& inducedA,
std::vector<double>& inducedB,
storm::storage::BitVector& probGreater0States
); );
template void solveInducedEquationSystem<double>(storm::solver::MinMaxLinearEquationSolver<double> const& solver,
std::vector<double>& x,
template void getInducedEquationSystem<double>(storm::solver::MinMaxLinearEquationSolver<double> const& solver,
std::vector<double> const& b, std::vector<double> const& b,
std::vector<storm::storage::sparse::state_type> const& policy, std::vector<storm::storage::sparse::state_type> const& policy,
storm::storage::BitVector const& targetChoices, storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<double>& inducedA,
std::vector<double>& inducedB,
storm::storage::BitVector& probGreater0States
);
template void solveLinearEquationSystem<double>(storm::storage::SparseMatrix<double>const& A,
std::vector<double>& x,
std::vector<double> const& b,
storm::storage::BitVector const& probGreater0States,
double const& prob0Value double const& prob0Value
); );
template bool checkAndFixPolicy<double>(storm::solver::GameSolver<double> const& solver,
std::vector<double> const& x,
std::vector<double> const& b,
std::vector<storm::storage::sparse::state_type>& player1Policy,
std::vector<storm::storage::sparse::state_type>& player2Policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<double>& inducedA,
std::vector<double>& inducedB,
storm::storage::BitVector& probGreater0States
);
template bool checkAndFixPolicy<double>(storm::solver::MinMaxLinearEquationSolver<double> const& solver,
std::vector<double> const& x,
std::vector<double> const& b,
std::vector<storm::storage::sparse::state_type>& policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<double>& inducedA,
std::vector<double>& inducedB,
storm::storage::BitVector& probGreater0States
);
} }
} }
} }

174
src/utility/policyguessing.h

@ -18,6 +18,7 @@
#include "src/utility/vector.h" #include "src/utility/vector.h"
#include "src/storage/BitVector.h" #include "src/storage/BitVector.h"
#include "src/storage/sparse/StateType.h" #include "src/storage/sparse/StateType.h"
#include "src/storage/SparseMatrix.h"
namespace storm { namespace storm {
namespace utility{ namespace utility{
@ -57,37 +58,6 @@ namespace storm {
ValueType const& prob0Value ValueType const& prob0Value
); );
/*!
* Solves the equation system defined by the matrix A and vector b' that result from applying
* the given policies to the matrices of the two players and the given b.
*
* Note that, depending on the policies, the qualitative properties of the graph defined by A
* might be different to the original graph of the game.
* To ensure a unique solution, we need to filter out the "prob0"-states.
* (Notice that new "prob1"-states do not harm as actual target states are always excluded, independent of the choice)
*
* @param solver the solver that contains the two player matrices
* @param x The initial guess of the solution.
* @param b The vector in which to select the entries of the right hand side
* @param player1Policy A policy that selects rows in every rowgroup of player1.
* @param player2Policy A policy that selects rows in every rowgroup of player2.
* @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
* @param prob0Value the value that is assigned to the states that have probability zero to reach a target
* @return The solution vector in the form of the vector x.
*/
template<typename ValueType>
void solveInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
std::vector<ValueType>& x,
std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& player1Policy,
std::vector<storm::storage::sparse::state_type> const& player2Policy,
storm::storage::BitVector const& targetChoices,
ValueType const& prob0Value
);
/*! /*!
* invokes the given MinMaxLinearEquationSolver. * invokes the given MinMaxLinearEquationSolver.
* *
@ -100,7 +70,7 @@ namespace storm {
* To identify these states and set the result for them correctly, it is necessary to know whether rewards or probabilities are to be computed * To identify these states and set the result for them correctly, it is necessary to know whether rewards or probabilities are to be computed
* *
* @param solver the solver that contains the two player matrices
* @param solver the solver that contains the matrix
* @param x The initial guess of the solution. * @param x The initial guess of the solution.
* @param b The vector to add after matrix-vector multiplication. * @param b The vector to add after matrix-vector multiplication.
* @param goal Sets whether we want to minimize or maximize. * @param goal Sets whether we want to minimize or maximize.
@ -120,31 +90,151 @@ namespace storm {
); );
/*! /*!
* Solves the equation system defined by the matrix A and vector b' that result from applying
* the given policy to the matrices of the two players and the given b.
* Constructs the equation system defined by the matrix inducedA and vector inducedB that result from applying
* the given policies to the matrices of the two players and the given b.
* *
* Note that, depending on the policy, the qualitative properties of the graph defined by A
* might be different to the original graph
* To ensure a unique solution, we need to filter out the "prob0"-states.
* (Notice that new "prob1"-states do not harm as actual target states are always excluded, independent of the choice)
* Note that, depending on the policies, the qualitative properties of the graph defined by inducedA
* might be different to the original graph.
* *
* @param solver the solver that contains the two player matrices * @param solver the solver that contains the two player matrices
* @param x The initial guess of the solution.
* @param b The vector in which to select the entries of the right hand side
* @param player1Policy A policy that selects rows in every rowgroup of player1.
* @param player2Policy A policy that selects rows in every rowgroup of player2.
* @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
* @param inducedA the Matrix for the resulting equation system
* @param inducedB the Vector for the resulting equation system
* @param probGreater0States marks the states which have a positive probability to lead to a target state
* @return Induced A, b and targets
*/
template<typename ValueType>
void getInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& player1Policy,
std::vector<storm::storage::sparse::state_type> const& player2Policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
);
/*!
* Constructs the equation system defined by the matrix inducedA and vector inducedB that result from applying
* the given policy to the matrix from the given solver and the given b.
*
* Note that, depending on the policies, the qualitative properties of the graph defined by inducedA
* might be different to the original graph.
*
* @param solver the solver that contains the matrix
* @param b The vector in which to select the entries of the right hand side * @param b The vector in which to select the entries of the right hand side
* @param policy A policy that selects rows in every rowgroup. * @param policy A policy that selects rows in every rowgroup.
* @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
* @param inducedA the Matrix for the resulting equation system
* @param inducedB the Vector for the resulting equation system
* @param probGreater0States marks the states which have a positive probability to lead to a target state
* @return Induced A, b and targets
*/
template<typename ValueType>
void getInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
);
/*!
* Solves the given equation system.
*
* It is not assumed that qualitative properties of the Graph defined by A have been checked, yet.
* However, actual target states are already filtered out.
* To ensure a unique solution, we also need to filter out the "prob0"-states.
*
* @param A the matrix of the equation system
* @param x The initial guess of the solution.
* @param b The vector of the equation system
* @param targetChoices marks the rows in the matrix that have a positive probability to lead to a target state * @param targetChoices marks the rows in the matrix that have a positive probability to lead to a target state
* @param prob0Value the value that is assigned to the states that have probability zero to reach a target * @param prob0Value the value that is assigned to the states that have probability zero to reach a target
* @return The solution vector in the form of the vector x. * @return The solution vector in the form of the vector x.
*/ */
template<typename ValueType> template<typename ValueType>
void solveInducedEquationSystem(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
void solveLinearEquationSystem(storm::storage::SparseMatrix<ValueType>const& A,
std::vector<ValueType>& x, std::vector<ValueType>& x,
std::vector<ValueType> const& b, std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type> const& policy,
storm::storage::BitVector const& targetChoices,
storm::storage::BitVector const& probGreater0States,
ValueType const& prob0Value ValueType const& prob0Value
); );
/*!
* Checks if the given policies make choices that lead to states from which no target state is reachable ("prob0"-states).
* This can happen when value iteration is applied and there are multiple choices with the same value
* (e.g. a state that allows to chose a selfloop with probability one)
*
* If the policies are changed, they are updated accordingly (as well as the given inducedA, inducedB and probGreater0States)
*
* @param solver the solver that contains the two player matrices
* @param x the solution vector (the result from value iteration)
* @param b The vector in which to select the entries of the right hand side
* @param player1Policy A policy that selects rows in every rowgroup of player1.
* @param player2Policy A policy that selects rows in every rowgroup of player2.
* @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
* @param inducedA the Matrix for the equation system
* @param inducedB the Vector for the equation system
* @param probGreater0States marks the states which have a positive probability to lead to a target state
* @return true iff there are no more prob0-states. Also changes the given policies accordingly
*/
template<typename ValueType>
bool checkAndFixPolicy(storm::solver::GameSolver<ValueType> const& solver,
std::vector<ValueType> const& x,
std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type>& player1Policy,
std::vector<storm::storage::sparse::state_type>& player2Policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
);
/*!
* Checks if the given policies make choices that lead to states from which no target state is reachable ("prob0"-states).
* This can happen when value iteration is applied and there are multiple choices with the same value
* (e.g. a state that allows to chose a selfloop with probability one)
*
* If the policies are changed, they are updated accordingly (as well as the given inducedA, inducedB and probGreater0States)
*
* @param solver the solver that contains the two player matrices
* @param x the solution vector (the result from value iteration)
* @param b The vector in which to select the entries of the right hand side
* @param policy A policy that selects rows in every rowgroup.
* @param targetChoices marks the choices in the player2 matrix that have a positive probability to lead to a target state
* @param inducedA the Matrix for the equation system
* @param inducedB the Vector for the equation system
* @param probGreater0States marks the states which have a positive probability to lead to a target state
* @return true iff there are no more prob0-states. Also changes the given policies accordingly
*/
template<typename ValueType>
bool checkAndFixPolicy(storm::solver::MinMaxLinearEquationSolver<ValueType> const& solver,
std::vector<ValueType> const& x,
std::vector<ValueType> const& b,
std::vector<storm::storage::sparse::state_type>& policy,
storm::storage::BitVector const& targetChoices,
storm::storage::SparseMatrix<ValueType>& inducedA,
std::vector<ValueType>& inducedB,
storm::storage::BitVector& probGreater0States
);
//little helper function
template<typename ValueType>
bool rowLeadsToTarget(uint_fast64_t row,
storm::storage::SparseMatrix<ValueType> const& matrix,
storm::storage::BitVector const& targetChoices,
storm::storage::BitVector const& probGreater0States){
if(targetChoices.get(row)) return true;
for(auto const& successor : matrix.getRow(row)){
if(probGreater0States.get(successor.getColumn())) return true;
}
return false;
}
} }
} }
} }

Loading…
Cancel
Save