You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
232 lines
11 KiB
232 lines
11 KiB
|
|
#ifndef PERMISSIVESCHEDULERS_H
|
|
#define PERMISSIVESCHEDULERS_H
|
|
|
|
#include <unordered_map>
|
|
#include "expressions/Variable.h"
|
|
#include "StateActionPair.h"
|
|
#include "StateActionTargetTuple.h"
|
|
|
|
namespace storm {
|
|
namespace storage {
|
|
class PermissiveSchedulerPenalties {
|
|
std::unordered_map<StateActionPair, double> mPenalties;
|
|
|
|
public:
|
|
double get(uint_fast64_t state, uint_fast64_t action) const {
|
|
return get(StateActionPair(state, action));
|
|
|
|
}
|
|
|
|
|
|
double get(StateActionPair const& sap) const {
|
|
auto it = mPenalties.find(sap);
|
|
if(it == mPenalties.end()) {
|
|
return 1.0;
|
|
}
|
|
else {
|
|
return it->second;
|
|
}
|
|
}
|
|
|
|
void set(uint_fast64_t state, uint_fast64_t action, double penalty) {
|
|
assert(penalty >= 1.0);
|
|
if(penalty == 1.0) {
|
|
auto it = mPenalties.find(std::make_pair(state, action));
|
|
if(it != mPenalties.end()) {
|
|
mPenalties.erase(it);
|
|
}
|
|
} else {
|
|
mPenalties.emplace(std::make_pair(state, action), penalty);
|
|
}
|
|
}
|
|
|
|
void clear() {
|
|
mPenalties.clear();
|
|
}
|
|
|
|
};
|
|
|
|
class MilpPermissiveSchedulerComputation {
|
|
private:
|
|
|
|
bool mCalledOptimizer = false;
|
|
storm::solver::LpSolver& solver;
|
|
std::shared_ptr<storm::models::sparse::Mdp<double>> mdp;
|
|
std::unordered_map<StateActionPair, storm::expressions::Variable> multistrategyVariables;
|
|
std::unordered_map<uint_fast64_t, storm::expressions::Variable> mProbVariables;
|
|
std::unordered_map<uint_fast64_t, storm::expressions::Variable> mAlphaVariables;
|
|
std::unordered_map<StateActionTarget, storm::expressions::Variable> mBetaVariables;
|
|
std::unordered_map<uint_fast64_t, storm::expressions::Variable> mGammaVariables;
|
|
BitVector const& mGoals;
|
|
BitVector const& mSinks;
|
|
|
|
public:
|
|
|
|
MilpPermissiveSchedulerComputation(storm::solver::LpSolver& milpsolver, std::shared_ptr<storm::models::sparse::Mdp<double>> mdp, BitVector const& goalstates, BitVector const& sinkstates)
|
|
: solver(milpsolver), mdp(mdp), mGoals(goalstates), mSinks(sinkstates)
|
|
{
|
|
|
|
}
|
|
|
|
|
|
void calculatePermissiveScheduler(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& irrelevantStates = BitVector()) {
|
|
createMILP(boundary, penalties, irrelevantStates);
|
|
solver.optimize();
|
|
mCalledOptimizer = true;
|
|
}
|
|
|
|
bool foundSolution() {
|
|
assert(mCalledOptimizer);
|
|
return !solver.isInfeasible();
|
|
}
|
|
|
|
BitVector getAllowedStateActionPairs() const {
|
|
BitVector result(mdp->getNumberOfChoices(), true);
|
|
for(auto const& entry : multistrategyVariables) {
|
|
if(!solver.getBinaryValue(entry.second)) {
|
|
result.set(mdp->getNondeterministicChoiceIndices()[entry.first.getState()]+entry.first.getAction(), false);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
private:
|
|
|
|
/**
|
|
*
|
|
*/
|
|
void createVariables(PermissiveSchedulerPenalties const& penalties, BitVector const& relevantStates) {
|
|
// We need the unique initial state later, so we get that one before looping.
|
|
assert(mdp->getInitialStates().getNumberOfSetBits() == 1);
|
|
uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0);
|
|
|
|
storm::expressions::Variable var;
|
|
for(uint_fast64_t s : relevantStates) {
|
|
// Create x_s variables
|
|
// Notice that only the initial probability appears in the objective.
|
|
if(s == initialStateIndex) {
|
|
var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0, -1.0);
|
|
} else {
|
|
var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0);
|
|
}
|
|
mProbVariables[s] = var;
|
|
// Create alpha_s variables
|
|
var = solver.addBinaryVariable("alp_" + std::to_string(s));
|
|
mAlphaVariables[s] = var;
|
|
// Create gamma_s variables
|
|
var = solver.addBoundedContinuousVariable("gam_" + std::to_string(s), 0.0, 1.0);
|
|
mGammaVariables[s] = var;
|
|
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
|
|
auto stateAndAction = StateActionPair(s,a);
|
|
// Create y_(s,a) variables
|
|
|
|
double penalty = penalties.get(stateAndAction);
|
|
var = solver.addBinaryVariable("y_" + std::to_string(s) + "_" + std::to_string(a), -penalty);
|
|
multistrategyVariables[stateAndAction] = var;
|
|
|
|
// Create beta_(s,a,t) variables
|
|
// Iterate over successors of s via a.
|
|
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
|
|
if(entry.getValue() != 0) {
|
|
StateActionTarget sat = {s,a,entry.getColumn()};
|
|
var = solver.addBinaryVariable("beta_" + to_string(sat));
|
|
mBetaVariables[sat] = var;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void createConstraints(double boundary, storm::storage::BitVector const& relevantStates) {
|
|
// (5) and (7) are omitted on purpose (-- we currenty do not support controllability of actions -- )
|
|
|
|
// (1)
|
|
assert(mdp->getInitialStates().getNumberOfSetBits() == 1);
|
|
uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0);
|
|
solver.addConstraint("c1", mProbVariables[initialStateIndex] >= solver.getConstant(boundary));
|
|
for(uint_fast64_t s : relevantStates) {
|
|
std::string stateString = std::to_string(s);
|
|
storm::expressions::Expression expr;
|
|
// (2)
|
|
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
|
|
expr = expr + multistrategyVariables[StateActionPair(s,a)];
|
|
}
|
|
solver.addConstraint("c2-" + stateString, solver.getConstant(1) <= expr);
|
|
// (5)
|
|
solver.addConstraint("c5-" + std::to_string(s), mProbVariables[s] <= mAlphaVariables[s]);
|
|
|
|
// (3) For the relevant states.
|
|
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
|
|
std::string sastring(stateString + "_" + std::to_string(a));
|
|
expr = storm::expressions::Expression();
|
|
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
|
|
if(entry.getValue() != 0 && relevantStates.get(entry.getColumn())) {
|
|
expr = expr + solver.getConstant(entry.getValue()) * mProbVariables[entry.getColumn()];
|
|
} else if (entry.getValue() != 0 && mGoals.get(entry.getColumn())) {
|
|
expr = expr + solver.getConstant(entry.getValue());
|
|
}
|
|
}
|
|
solver.addConstraint("c3-" + sastring, mProbVariables[s] < (solver.getConstant(1) - multistrategyVariables[StateActionPair(s,a)]) + expr);
|
|
}
|
|
|
|
for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
|
|
// (6)
|
|
std::string sastring(stateString + "_" + std::to_string(a));
|
|
expr = storm::expressions::Expression();
|
|
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
|
|
if(entry.getValue() != 0) {
|
|
StateActionTarget sat = {s,a,entry.getColumn()};
|
|
expr = expr + mBetaVariables[sat];
|
|
}
|
|
}
|
|
solver.addConstraint("c6-" + sastring, multistrategyVariables[StateActionPair(s,a)] == (solver.getConstant(1) - mAlphaVariables[s]) + expr);
|
|
|
|
for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
|
|
if(entry.getValue() != 0) {
|
|
StateActionTarget sat = {s,a,entry.getColumn()};
|
|
std::string satstring = to_string(sat);
|
|
// (8)
|
|
solver.addConstraint("c8-" + satstring, mGammaVariables[entry.getColumn()] < mGammaVariables[s] + (solver.getConstant(1) - mBetaVariables[sat])); // With rewards, we have to change this.
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*/
|
|
void createMILP(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& dontCareStates ) {
|
|
BitVector irrelevant = mGoals | mSinks;
|
|
BitVector relevantStates = ~irrelevant;
|
|
// Notice that the separated construction of variables and
|
|
// constraints slows down the construction of the MILP.
|
|
// In the future, we might want to merge this.
|
|
createVariables(penalties, relevantStates);
|
|
createConstraints(boundary, relevantStates);
|
|
|
|
|
|
solver.setModelSense(storm::solver::LpSolver::ModelSense::Minimize);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
}
|
|
}
|
|
|
|
|
|
#endif /* PERMISSIVESCHEDULERS_H */
|
|
|