From 5ec1e2acbed1f5f16e9e48c2f505b742ae209964 Mon Sep 17 00:00:00 2001
From: sjunges <sebastian.junges@rwth-aachen.de>
Date: Mon, 10 Aug 2015 19:25:03 +0200
Subject: [PATCH] PermissiveSchedulers are on future now

Former-commit-id: d49b7623e6a498bc5380208ddfbb87ca02d2c54f
---
 src/storage/PermissiveSchedulers.h | 232 +++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 src/storage/PermissiveSchedulers.h

diff --git a/src/storage/PermissiveSchedulers.h b/src/storage/PermissiveSchedulers.h
new file mode 100644
index 000000000..a5d8b5574
--- /dev/null
+++ b/src/storage/PermissiveSchedulers.h
@@ -0,0 +1,232 @@
+
+#ifndef PERMISSIVESCHEDULERS_H
+#define	PERMISSIVESCHEDULERS_H
+
+#include <unordered_map>
+#include "expressions/Variable.h"
+#include "StateActionPair.h"
+#include "StateActionTargetTuple.h"
+
+namespace storm {
+    namespace storage {
+        class PermissiveSchedulerPenalties {
+            std::unordered_map<StateActionPair, double> mPenalties;
+            
+        public:
+            double get(uint_fast64_t state, uint_fast64_t action) const {
+                return get(StateActionPair(state, action));
+              
+            }
+            
+            
+            double get(StateActionPair const& sap) const {
+                auto it = mPenalties.find(sap);
+                if(it == mPenalties.end()) {
+                    return 1.0;
+                }
+                else {
+                    return it->second;
+                }
+            }
+            
+            void set(uint_fast64_t state, uint_fast64_t action, double penalty) {
+                assert(penalty >= 1.0);
+                if(penalty == 1.0) {
+                    auto it = mPenalties.find(std::make_pair(state, action));
+                    if(it != mPenalties.end()) {
+                        mPenalties.erase(it);
+                    }
+                } else {
+                    mPenalties.emplace(std::make_pair(state, action), penalty);
+                }
+            }
+            
+            void clear() {
+                mPenalties.clear();
+            }
+            
+        };
+        
+        class MilpPermissiveSchedulerComputation {
+        private:
+           
+            bool mCalledOptimizer = false;
+            storm::solver::LpSolver& solver;
+            std::shared_ptr<storm::models::sparse::Mdp<double>> mdp;
+            std::unordered_map<StateActionPair, storm::expressions::Variable> multistrategyVariables;
+            std::unordered_map<uint_fast64_t, storm::expressions::Variable> mProbVariables;
+            std::unordered_map<uint_fast64_t, storm::expressions::Variable> mAlphaVariables;
+            std::unordered_map<StateActionTarget, storm::expressions::Variable> mBetaVariables;
+            std::unordered_map<uint_fast64_t, storm::expressions::Variable> mGammaVariables;
+            BitVector const& mGoals;
+            BitVector const& mSinks;
+            
+        public:
+            
+            MilpPermissiveSchedulerComputation(storm::solver::LpSolver& milpsolver, std::shared_ptr<storm::models::sparse::Mdp<double>> mdp, BitVector const& goalstates, BitVector const& sinkstates)
+                : solver(milpsolver), mdp(mdp), mGoals(goalstates), mSinks(sinkstates)
+            {
+                
+            }
+            
+                    
+            void calculatePermissiveScheduler(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& irrelevantStates = BitVector()) {
+                createMILP(boundary, penalties, irrelevantStates);
+                solver.optimize();
+                mCalledOptimizer = true;
+            }
+            
+            bool foundSolution() {
+                assert(mCalledOptimizer);
+                return !solver.isInfeasible();
+            }
+            
+            BitVector getAllowedStateActionPairs() const {
+                BitVector result(mdp->getNumberOfChoices(), true);
+                for(auto const& entry : multistrategyVariables) {
+                    if(!solver.getBinaryValue(entry.second)) {
+                        result.set(mdp->getNondeterministicChoiceIndices()[entry.first.getState()]+entry.first.getAction(), false);
+                    }
+                }
+                return result;
+            }
+            
+                    
+        private:
+            
+            /**
+             * 
+             */
+            void createVariables(PermissiveSchedulerPenalties const& penalties, BitVector const& relevantStates) {
+                // We need the unique initial state later, so we get that one before looping.
+                assert(mdp->getInitialStates().getNumberOfSetBits() == 1);
+                uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0);
+                    
+                storm::expressions::Variable var;
+                for(uint_fast64_t s : relevantStates) {
+                    // Create x_s variables
+                    // Notice that only the initial probability appears in the objective.
+                    if(s == initialStateIndex) {
+                        var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0, -1.0);
+                    } else {
+                        var = solver.addLowerBoundedContinuousVariable("x_" + std::to_string(s), 0.0);
+                    }
+                    mProbVariables[s] = var;
+                    // Create alpha_s variables
+                    var = solver.addBinaryVariable("alp_" + std::to_string(s));
+                    mAlphaVariables[s] = var;
+                    // Create gamma_s variables
+                    var = solver.addBoundedContinuousVariable("gam_" + std::to_string(s), 0.0, 1.0);
+                    mGammaVariables[s] = var;
+                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
+                        auto stateAndAction = StateActionPair(s,a);
+                        // Create y_(s,a) variables
+                        
+                        double penalty = penalties.get(stateAndAction);
+                        var = solver.addBinaryVariable("y_"  + std::to_string(s) + "_" + std::to_string(a), -penalty);
+                        multistrategyVariables[stateAndAction] = var;
+                        
+                        // Create beta_(s,a,t) variables
+                        // Iterate over successors of s via a.
+                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
+                            if(entry.getValue() != 0) {
+                                StateActionTarget sat = {s,a,entry.getColumn()};
+                                var = solver.addBinaryVariable("beta_" + to_string(sat));
+                                mBetaVariables[sat] = var;
+                            }
+                        }
+                                
+                    }
+                }
+            }
+            
+            
+            
+            void createConstraints(double boundary, storm::storage::BitVector const& relevantStates) {
+                // (5) and (7) are omitted on purpose (-- we currenty do not support controllability of actions -- )
+                
+                // (1)
+                assert(mdp->getInitialStates().getNumberOfSetBits() == 1);
+                uint_fast64_t initialStateIndex = mdp->getInitialStates().getNextSetIndex(0);
+                solver.addConstraint("c1", mProbVariables[initialStateIndex] >= solver.getConstant(boundary));
+                for(uint_fast64_t s : relevantStates) {
+                    std::string stateString =  std::to_string(s);
+                    storm::expressions::Expression expr;
+                    // (2)
+                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
+                        expr = expr + multistrategyVariables[StateActionPair(s,a)];
+                    }   
+                    solver.addConstraint("c2-" + stateString, solver.getConstant(1) <= expr);
+                    // (5)
+                    solver.addConstraint("c5-" + std::to_string(s), mProbVariables[s] <= mAlphaVariables[s]);
+               
+                    // (3) For the relevant states.
+                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {
+                        std::string sastring(stateString + "_" + std::to_string(a));
+                        expr = storm::expressions::Expression();        
+                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
+                            if(entry.getValue() != 0 && relevantStates.get(entry.getColumn())) {
+                                expr = expr + solver.getConstant(entry.getValue()) * mProbVariables[entry.getColumn()];
+                            } else if (entry.getValue() != 0 && mGoals.get(entry.getColumn())) {
+                                expr = expr + solver.getConstant(entry.getValue());
+                            }
+                        }        
+                        solver.addConstraint("c3-" + sastring, mProbVariables[s] < (solver.getConstant(1) - multistrategyVariables[StateActionPair(s,a)]) + expr);
+                    }
+
+                    for(uint_fast64_t a = 0; a < mdp->getNumberOfChoices(s); ++a) {    
+                        // (6)
+                        std::string sastring(stateString + "_" + std::to_string(a));
+                        expr = storm::expressions::Expression();        
+                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
+                            if(entry.getValue() != 0) {
+                                StateActionTarget sat = {s,a,entry.getColumn()};
+                                expr = expr + mBetaVariables[sat];
+                            }
+                        }        
+                        solver.addConstraint("c6-" + sastring, multistrategyVariables[StateActionPair(s,a)] == (solver.getConstant(1) - mAlphaVariables[s]) + expr);
+                        
+                        for(auto const& entry : mdp->getTransitionMatrix().getRow(mdp->getNondeterministicChoiceIndices()[s]+a)) {
+                            if(entry.getValue() != 0) {
+                                StateActionTarget sat = {s,a,entry.getColumn()};
+                                std::string satstring = to_string(sat);
+                                // (8)
+                                solver.addConstraint("c8-" + satstring, mGammaVariables[entry.getColumn()] < mGammaVariables[s] + (solver.getConstant(1) - mBetaVariables[sat])); // With rewards, we have to change this.
+                            }
+                        }
+                                
+                    }
+                }
+            }
+                
+            
+            /**
+             *
+             */
+            void createMILP(double boundary, PermissiveSchedulerPenalties const& penalties, BitVector const& dontCareStates ) {
+                BitVector irrelevant = mGoals | mSinks;
+                BitVector relevantStates = ~irrelevant;
+                // Notice that the separated construction of variables and 
+                // constraints slows down the construction of the MILP. 
+                // In the future, we might want to merge this.
+                createVariables(penalties, relevantStates);
+                createConstraints(boundary, relevantStates);
+                
+                
+                solver.setModelSense(storm::solver::LpSolver::ModelSense::Minimize);
+                
+                
+            }
+            
+            
+            
+            
+            
+        };
+        
+    }
+}
+
+
+#endif	/* PERMISSIVESCHEDULERS_H */
+