PermissiveSchedulers are on future now

Former-commit-id: d49b7623e6
11 years ago · 5ec1e2acbe
1 changed files with 232 additions and 0 deletions
--- a/src/storage/PermissiveSchedulers.h
+++ b/src/storage/PermissiveSchedulers.h
@ -0,0 +1,232 @@
 #ifndef PERMISSIVESCHEDULERS_H
 #define	PERMISSIVESCHEDULERS_H
 #include <unordered_map>
 #include "expressions/Variable.h"
 #include "StateActionPair.h"
 #include "StateActionTargetTuple.h"
 namespace storm {
    namespace storage {
        class PermissiveSchedulerPenalties {
            std::unordered_map<StateActionPair, double> mPenalties;
        public:
            double get(uint_fast64_t state, uint_fast64_t action) const {
                return get(StateActionPair(state, action));
            }
            double get(StateActionPair const& sap) const {
                auto it = mPenalties.find(sap);
                if(it == mPenalties.end()) {
                    return 1.0;
                }
                else {
                    return it->second;
                }
            }
            void set(uint_fast64_t state, uint_fast64_t action, double penalty) {
                assert(penalty >= 1.0);
                if(penalty == 1.0) {
                    auto it = mPenalties.find(std::make_pair(state, action));
                    if(it != mPenalties.end()) {
                        mPenalties.erase(it);
                    }
                } else {
                    mPenalties.emplace(std::make_pair(state, action), penalty);
                }
            }
            void clear() {
                mPenalties.clear();
            }
        };
        class MilpPermissiveSchedulerComputation {
        private:
            bool mCalledOptimizer = false;
            storm::solver::LpSolver& solver;
            std::shared_ptr<storm::models::sparse::Mdp<double>> mdp;
            std::unordered_map<StateActionPair, storm::expressions::Variable> multistrategyVariables;
            std::unordered_map<uint_fast64_t, storm::expressions::Variable> mProbVariables;
            std::unordered_map<uint_fast64_t, storm::expressions::Variable> mAlphaVariables;
            std::unordered_map<StateActionTarget, storm::expressions::Variable> mBetaVariables;
            std::unordered_map<uint_fast64_t, storm::expressions::Variable> mGammaVariables;
            BitVector const& mGoals;
            BitVector const& mSinks;
        public:
            MilpPermissiveSchedulerComputation(storm::solver::LpSolver& milpsolver, std::shared_ptr<storm::models::sparse::Mdp<double>> mdp, BitVector const& goalstates, BitVector const& sinkstates)
                : solver(milpsolver), mdp(mdp), mGoals(goalstates), mSinks(sinkstates)
            {
            }
            void calculatePermissiveScheduler(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& irrelevantStates = BitVector()) {
                createMILP(boundary, penalties, irrelevantStates);
                solver.optimize();
                mCalledOptimizer = true;
            }
            bool foundSolution() {
                assert(mCalledOptimizer);
                return !solver.isInfeasible();
            }
            BitVector getAllowedStateActionPairs() const {
                BitVector result(mdp->getNumberOfChoices(), true);
                for(auto const& entry : multistrategyVariables) {
                    if(!solver.getBinaryValue(entry.second)) {
                        result.set(mdp->getNondeterministicChoiceIndices()[entry.first.getState()]+entry.first.getAction(), false);
                    }
                }
                return result;
            }
        private:
            /**
             * 
             */
            void createVariables(PermissiveSchedulerPenalties const& penalties, BitVector const& relevantStates) {
                // We need the unique initial state later, so we get that one before looping.
                assert(mdp->getInitialStates().getNumberOfSetBits() == 1);
                uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0);
                storm::expressions::Variable var;
                for(uint_fast64_t s : relevantStates) {
                    // Create x_s variables
                    // Notice that only the initial probability appears in the objective.
                    if(s == initialStateIndex) {
                        var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0, -1.0);
                    } else {
                        var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0);
                    }
                    mProbVariables[s] = var;
                    // Create alpha_s variables
                    var = solver.addBinaryVariable("alp_" + std::to_string(s));
                    mAlphaVariables[s] = var;
                    // Create gamma_s variables
                    var = solver.addBoundedContinuousVariable("gam_" + std::to_string(s), 0.0, 1.0);
                    mGammaVariables[s] = var;
                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
                        auto stateAndAction = StateActionPair(s,a);
                        // Create y_(s,a) variables
                        double penalty = penalties.get(stateAndAction);
                        var = solver.addBinaryVariable("y_"  + std::to_string(s) + "_" + std::to_string(a), -penalty);
                        multistrategyVariables[stateAndAction] = var;
                        // Create beta_(s,a,t) variables
                        // Iterate over successors of s via a.
                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
                            if(entry.getValue() != 0) {
                                StateActionTarget sat = {s,a,entry.getColumn()};
                                var = solver.addBinaryVariable("beta_" + to_string(sat));
                                mBetaVariables[sat] = var;
                            }
                        }
                    }
                }
            }
            void createConstraints(double boundary, storm::storage::BitVector const& relevantStates) {
                // (5) and (7) are omitted on purpose (-- we currenty do not support controllability of actions -- )
                // (1)
                assert(mdp->getInitialStates().getNumberOfSetBits() == 1);
                uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0);
                solver.addConstraint("c1", mProbVariables[initialStateIndex] >= solver.getConstant(boundary));
                for(uint_fast64_t s : relevantStates) {
                    std::string stateString =  std::to_string(s);
                    storm::expressions::Expression expr;
                    // (2)
                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
                        expr = expr + multistrategyVariables[StateActionPair(s,a)];
                    }   
                    solver.addConstraint("c2-" + stateString, solver.getConstant(1) <= expr);
                    // (5)
                    solver.addConstraint("c5-" + std::to_string(s), mProbVariables[s] <= mAlphaVariables[s]);
                    // (3) For the relevant states.
                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
                        std::string sastring(stateString + "_" + std::to_string(a));
                        expr = storm::expressions::Expression();        
                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
                            if(entry.getValue() != 0 && relevantStates.get(entry.getColumn())) {
                                expr = expr + solver.getConstant(entry.getValue()) * mProbVariables[entry.getColumn()];
                            } else if (entry.getValue() != 0 && mGoals.get(entry.getColumn())) {
                                expr = expr + solver.getConstant(entry.getValue());
                            }
                        }        
                        solver.addConstraint("c3-" + sastring, mProbVariables[s] < (solver.getConstant(1) - multistrategyVariables[StateActionPair(s,a)]) + expr);
                    }
                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {    
                        // (6)
                        std::string sastring(stateString + "_" + std::to_string(a));
                        expr = storm::expressions::Expression();        
                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
                            if(entry.getValue() != 0) {
                                StateActionTarget sat = {s,a,entry.getColumn()};
                                expr = expr + mBetaVariables[sat];
                            }
                        }        
                        solver.addConstraint("c6-" + sastring, multistrategyVariables[StateActionPair(s,a)] == (solver.getConstant(1) - mAlphaVariables[s]) + expr);
                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
                            if(entry.getValue() != 0) {
                                StateActionTarget sat = {s,a,entry.getColumn()};
                                std::string satstring = to_string(sat);
                                // (8)
                                solver.addConstraint("c8-" + satstring, mGammaVariables[entry.getColumn()] < mGammaVariables[s] + (solver.getConstant(1) - mBetaVariables[sat])); // With rewards, we have to change this.
                            }
                        }
                    }
                }
            }
            /**
             *
             */
            void createMILP(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& dontCareStates ) {
                BitVector irrelevant = mGoals | mSinks;
                BitVector relevantStates = ~irrelevant;
                // Notice that the separated construction of variables and 
                // constraints slows down the construction of the MILP. 
                // In the future, we might want to merge this.
                createVariables(penalties, relevantStates);
                createConstraints(boundary, relevantStates);
                solver.setModelSense(storm::solver::LpSolver::ModelSense::Minimize);
            }
        };
    }
 }
 #endif	/* PERMISSIVESCHEDULERS_H */