Browse Source

Optimization for policy recycling

Former-commit-id: b66a74b746
tempestpy_adaptions
TimQu 9 years ago
parent
commit
f3f3a31e1e
  1. 12
      src/modelchecker/region/ApproximationModel.cpp
  2. 32
      src/utility/policyguessing.cpp
  3. 4
      src/utility/policyguessing.h

12
src/modelchecker/region/ApproximationModel.cpp

@ -283,7 +283,7 @@ namespace storm {
Policy& policy = computeLowerBounds ? this->solverData.lastMinimizingPolicy : this->solverData.lastMaximizingPolicy;
//TODO: at this point, set policy to the one stored in the region.
invokeSolver(computeLowerBounds, policy);
/*
//TODO: (maybe) when a few parameters are mapped to another value, build a "nicer" scheduler and check whether it induces values that are more optimal.
//Get the set of parameters which are (according to the policy) always mapped to the same region boundary.
//First, collect all (relevant) parameters, i.e., the ones that are set to the lower or upper boundary.
@ -325,15 +325,16 @@ namespace storm {
}
// std::cout << "Used Approximation" << std::endl;
for (auto const& varcount : VarCount){
// if(varcount.second.first > 0 && varcount.second.second > 0){
// std::cout << " Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl;
// }
if(varcount.second.first > 0 && varcount.second.second > 0){
std::cout << " Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ", " << (computeLowerBounds ? "MIN" : "MAX") << ")" << std::endl;
}
}
for (auto const& fixVar : fixedVariables){
//std::cout << " APPROXMODEL: variable " << fixVar.first << " is always mapped to " << fixVar.second << std::endl;
}
// std::cout << " Result is " << this->solverData.result[this->solverData.initialStateIndex] << std::endl;
*/
return this->solverData.result[this->solverData.initialStateIndex];
}
@ -415,6 +416,9 @@ namespace storm {
this->solverData.player1Goal.direction(), player2Goal.direction(),
this->solverData.lastPlayer1Policy, policy,
this->matrixData.targetChoices, (this->computeRewards ? storm::utility::infinity<double>() : storm::utility::zero<double>()));
// this->solverData.result = std::vector<double>(this->solverData.result.size(), 0.0);
// solver->solveGame(this->solverData.player1Goal.direction(), player2Goal.direction(), this->solverData.result, this->vectorData.vector);
}

32
src/utility/policyguessing.cpp

@ -37,7 +37,7 @@ namespace storm {
storm::storage::BitVector probGreater0States;
getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
solver.setPolicyTracking();
bool resultCorrect = false;
@ -52,7 +52,7 @@ namespace storm {
if(!resultCorrect){
//If the policy could not be fixed, it indicates that our guessed values were to high.
STORM_LOG_WARN("Policies could not be fixed. Restarting Gamesolver. ");
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
//x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
}
}
@ -107,7 +107,7 @@ namespace storm {
std::vector<ValueType> inducedB;
storm::storage::BitVector probGreater0States;
getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
solver.setPolicyTracking();
bool resultCorrect = false;
@ -122,16 +122,13 @@ namespace storm {
if(!resultCorrect){
//If the policy could not be fixed, it indicates that our guessed values were to high.
STORM_LOG_WARN("Policy could not be fixed. Restarting MinMaxsolver." );
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
//x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
}
}
}
template <typename ValueType>
void getInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
std::vector<ValueType> const& b,
@ -198,7 +195,9 @@ namespace storm {
std::vector<ValueType>& x,
std::vector<ValueType> const& b,
storm::storage::BitVector const& probGreater0States,
ValueType const& prob0Value
ValueType const& prob0Value,
ValueType const& precision,
bool relative
){
//Get the submatrix/subvector A,x, and b and invoke linear equation solver
storm::storage::SparseMatrix<ValueType> subA = A.getSubmatrix(true, probGreater0States, probGreater0States, true);
@ -210,6 +209,17 @@ namespace storm {
std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
linEqSysSolver->solveEquationSystem(subX, subB);
//Performing a couple of iterations makes the result "stable" when doing value iteration, i.e.,
//if the given equation system is induced by an optimal policy, value iteration will terminate after the first iteration.
subA.convertToEquationSystem();
linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
std::vector<ValueType> copyX(subX.size());
std::size_t iterations = 100000; //don't run into an endless loop...
do {
linEqSysSolver->performMatrixVectorMultiplication(subX, &subB, 10, &copyX);
--iterations;
} while(!storm::utility::vector::equalModuloPrecision(subX, copyX, precision, relative) && iterations>0);
STORM_LOG_WARN_COND(iterations>0, "Iterations on result of linear equation solver did not converge.");
//fill in the result
storm::utility::vector::setVectorValues(x, probGreater0States, subX);
storm::utility::vector::setVectorValues(x, (~probGreater0States), prob0Value);
@ -238,7 +248,7 @@ namespace storm {
* 2. There is another choice that leads to target
* 3. The value of that choice is equal to the value of the choice given by the policy
* Note that the values of the result will not change this way.
* We do this unil the policy does not change anymore
* We do this until the policy does not change anymore
*/
policyChanged = false;
//Player 1:
@ -403,7 +413,9 @@ namespace storm {
std::vector<double>& x,
std::vector<double> const& b,
storm::storage::BitVector const& probGreater0States,
double const& prob0Value
double const& prob0Value,
double const& precision,
bool relative
);
template bool checkAndFixPolicy<double>(storm::solver::GameSolver<double> const& solver,

4
src/utility/policyguessing.h

@ -162,7 +162,9 @@ namespace storm {
std::vector<ValueType>& x,
std::vector<ValueType> const& b,
storm::storage::BitVector const& probGreater0States,
ValueType const& prob0Value
ValueType const& prob0Value,
ValueType const& precision,
bool relative
);
/*!

Loading…
Cancel
Save