Browse Source
PermissiveSchedulers are on future now
PermissiveSchedulers are on future now
Former-commit-id: d49b7623e6
tempestpy_adaptions
sjunges
10 years ago
1 changed files with 232 additions and 0 deletions
@ -0,0 +1,232 @@ |
|||
|
|||
#ifndef PERMISSIVESCHEDULERS_H |
|||
#define PERMISSIVESCHEDULERS_H |
|||
|
|||
#include <unordered_map> |
|||
#include "expressions/Variable.h" |
|||
#include "StateActionPair.h" |
|||
#include "StateActionTargetTuple.h" |
|||
|
|||
namespace storm { |
|||
namespace storage { |
|||
class PermissiveSchedulerPenalties { |
|||
std::unordered_map<StateActionPair, double> mPenalties; |
|||
|
|||
public: |
|||
double get(uint_fast64_t state, uint_fast64_t action) const { |
|||
return get(StateActionPair(state, action)); |
|||
|
|||
} |
|||
|
|||
|
|||
double get(StateActionPair const& sap) const { |
|||
auto it = mPenalties.find(sap); |
|||
if(it == mPenalties.end()) { |
|||
return 1.0; |
|||
} |
|||
else { |
|||
return it->second; |
|||
} |
|||
} |
|||
|
|||
void set(uint_fast64_t state, uint_fast64_t action, double penalty) { |
|||
assert(penalty >= 1.0); |
|||
if(penalty == 1.0) { |
|||
auto it = mPenalties.find(std::make_pair(state, action)); |
|||
if(it != mPenalties.end()) { |
|||
mPenalties.erase(it); |
|||
} |
|||
} else { |
|||
mPenalties.emplace(std::make_pair(state, action), penalty); |
|||
} |
|||
} |
|||
|
|||
void clear() { |
|||
mPenalties.clear(); |
|||
} |
|||
|
|||
}; |
|||
|
|||
class MilpPermissiveSchedulerComputation { |
|||
private: |
|||
|
|||
bool mCalledOptimizer = false; |
|||
storm::solver::LpSolver& solver; |
|||
std::shared_ptr<storm::models::sparse::Mdp<double>> mdp; |
|||
std::unordered_map<StateActionPair, storm::expressions::Variable> multistrategyVariables; |
|||
std::unordered_map<uint_fast64_t, storm::expressions::Variable> mProbVariables; |
|||
std::unordered_map<uint_fast64_t, storm::expressions::Variable> mAlphaVariables; |
|||
std::unordered_map<StateActionTarget, storm::expressions::Variable> mBetaVariables; |
|||
std::unordered_map<uint_fast64_t, storm::expressions::Variable> mGammaVariables; |
|||
BitVector const& mGoals; |
|||
BitVector const& mSinks; |
|||
|
|||
public: |
|||
|
|||
MilpPermissiveSchedulerComputation(storm::solver::LpSolver& milpsolver, std::shared_ptr<storm::models::sparse::Mdp<double>> mdp, BitVector const& goalstates, BitVector const& sinkstates) |
|||
: solver(milpsolver), mdp(mdp), mGoals(goalstates), mSinks(sinkstates) |
|||
{ |
|||
|
|||
} |
|||
|
|||
|
|||
void calculatePermissiveScheduler(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& irrelevantStates = BitVector()) { |
|||
createMILP(boundary, penalties, irrelevantStates); |
|||
solver.optimize(); |
|||
mCalledOptimizer = true; |
|||
} |
|||
|
|||
bool foundSolution() { |
|||
assert(mCalledOptimizer); |
|||
return !solver.isInfeasible(); |
|||
} |
|||
|
|||
BitVector getAllowedStateActionPairs() const { |
|||
BitVector result(mdp->getNumberOfChoices(), true); |
|||
for(auto const& entry : multistrategyVariables) { |
|||
if(!solver.getBinaryValue(entry.second)) { |
|||
result.set(mdp->getNondeterministicChoiceIndices()[entry.first.getState()]+entry.first.getAction(), false); |
|||
} |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
|
|||
private: |
|||
|
|||
/** |
|||
* |
|||
*/ |
|||
void createVariables(PermissiveSchedulerPenalties const& penalties, BitVector const& relevantStates) { |
|||
// We need the unique initial state later, so we get that one before looping. |
|||
assert(mdp->getInitialStates().getNumberOfSetBits() == 1); |
|||
uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0); |
|||
|
|||
storm::expressions::Variable var; |
|||
for(uint_fast64_t s : relevantStates) { |
|||
// Create x_s variables |
|||
// Notice that only the initial probability appears in the objective. |
|||
if(s == initialStateIndex) { |
|||
var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0, -1.0); |
|||
} else { |
|||
var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0); |
|||
} |
|||
mProbVariables[s] = var; |
|||
// Create alpha_s variables |
|||
var = solver.addBinaryVariable("alp_" + std::to_string(s)); |
|||
mAlphaVariables[s] = var; |
|||
// Create gamma_s variables |
|||
var = solver.addBoundedContinuousVariable("gam_" + std::to_string(s), 0.0, 1.0); |
|||
mGammaVariables[s] = var; |
|||
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) { |
|||
auto stateAndAction = StateActionPair(s,a); |
|||
// Create y_(s,a) variables |
|||
|
|||
double penalty = penalties.get(stateAndAction); |
|||
var = solver.addBinaryVariable("y_" + std::to_string(s) + "_" + std::to_string(a), -penalty); |
|||
multistrategyVariables[stateAndAction] = var; |
|||
|
|||
// Create beta_(s,a,t) variables |
|||
// Iterate over successors of s via a. |
|||
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) { |
|||
if(entry.getValue() != 0) { |
|||
StateActionTarget sat = {s,a,entry.getColumn()}; |
|||
var = solver.addBinaryVariable("beta_" + to_string(sat)); |
|||
mBetaVariables[sat] = var; |
|||
} |
|||
} |
|||
|
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
void createConstraints(double boundary, storm::storage::BitVector const& relevantStates) { |
|||
// (5) and (7) are omitted on purpose (-- we currenty do not support controllability of actions -- ) |
|||
|
|||
// (1) |
|||
assert(mdp->getInitialStates().getNumberOfSetBits() == 1); |
|||
uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0); |
|||
solver.addConstraint("c1", mProbVariables[initialStateIndex] >= solver.getConstant(boundary)); |
|||
for(uint_fast64_t s : relevantStates) { |
|||
std::string stateString = std::to_string(s); |
|||
storm::expressions::Expression expr; |
|||
// (2) |
|||
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) { |
|||
expr = expr + multistrategyVariables[StateActionPair(s,a)]; |
|||
} |
|||
solver.addConstraint("c2-" + stateString, solver.getConstant(1) <= expr); |
|||
// (5) |
|||
solver.addConstraint("c5-" + std::to_string(s), mProbVariables[s] <= mAlphaVariables[s]); |
|||
|
|||
// (3) For the relevant states. |
|||
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) { |
|||
std::string sastring(stateString + "_" + std::to_string(a)); |
|||
expr = storm::expressions::Expression(); |
|||
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) { |
|||
if(entry.getValue() != 0 && relevantStates.get(entry.getColumn())) { |
|||
expr = expr + solver.getConstant(entry.getValue()) * mProbVariables[entry.getColumn()]; |
|||
} else if (entry.getValue() != 0 && mGoals.get(entry.getColumn())) { |
|||
expr = expr + solver.getConstant(entry.getValue()); |
|||
} |
|||
} |
|||
solver.addConstraint("c3-" + sastring, mProbVariables[s] < (solver.getConstant(1) - multistrategyVariables[StateActionPair(s,a)]) + expr); |
|||
} |
|||
|
|||
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) { |
|||
// (6) |
|||
std::string sastring(stateString + "_" + std::to_string(a)); |
|||
expr = storm::expressions::Expression(); |
|||
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) { |
|||
if(entry.getValue() != 0) { |
|||
StateActionTarget sat = {s,a,entry.getColumn()}; |
|||
expr = expr + mBetaVariables[sat]; |
|||
} |
|||
} |
|||
solver.addConstraint("c6-" + sastring, multistrategyVariables[StateActionPair(s,a)] == (solver.getConstant(1) - mAlphaVariables[s]) + expr); |
|||
|
|||
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) { |
|||
if(entry.getValue() != 0) { |
|||
StateActionTarget sat = {s,a,entry.getColumn()}; |
|||
std::string satstring = to_string(sat); |
|||
// (8) |
|||
solver.addConstraint("c8-" + satstring, mGammaVariables[entry.getColumn()] < mGammaVariables[s] + (solver.getConstant(1) - mBetaVariables[sat])); // With rewards, we have to change this. |
|||
} |
|||
} |
|||
|
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
/** |
|||
* |
|||
*/ |
|||
void createMILP(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& dontCareStates ) { |
|||
BitVector irrelevant = mGoals | mSinks; |
|||
BitVector relevantStates = ~irrelevant; |
|||
// Notice that the separated construction of variables and |
|||
// constraints slows down the construction of the MILP. |
|||
// In the future, we might want to merge this. |
|||
createVariables(penalties, relevantStates); |
|||
createConstraints(boundary, relevantStates); |
|||
|
|||
|
|||
solver.setModelSense(storm::solver::LpSolver::ModelSense::Minimize); |
|||
|
|||
|
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
}; |
|||
|
|||
} |
|||
} |
|||
|
|||
|
|||
#endif /* PERMISSIVESCHEDULERS_H */ |
|||
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue