From f3f3a31e1e693d4e5ba90413a6ab93c20e4b6e7f Mon Sep 17 00:00:00 2001
From: TimQu <tim.quatmann@rwth-aachen.de>
Date: Sun, 22 Nov 2015 21:25:09 +0100
Subject: [PATCH] Optimization for policy recycling

Former-commit-id: b66a74b746adf0ec84502e632decf6385596fde3
---
 .../region/ApproximationModel.cpp             | 12 ++++---
 src/solver/GameSolver.cpp                     |  2 +-
 src/utility/policyguessing.cpp                | 34 +++++++++++++------
 src/utility/policyguessing.h                  |  4 ++-
 4 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/src/modelchecker/region/ApproximationModel.cpp b/src/modelchecker/region/ApproximationModel.cpp
index 9da24bdf7..db500cec6 100644
--- a/src/modelchecker/region/ApproximationModel.cpp
+++ b/src/modelchecker/region/ApproximationModel.cpp
@@ -283,7 +283,7 @@ namespace storm {
                 Policy& policy = computeLowerBounds ? this->solverData.lastMinimizingPolicy : this->solverData.lastMaximizingPolicy;
                 //TODO: at this point, set policy to the one stored in the region.
                 invokeSolver(computeLowerBounds, policy);
-                
+    /*
                 //TODO: (maybe) when a few parameters are mapped to another value, build a "nicer" scheduler and check whether it induces values that are more optimal.
                 //Get the set of parameters which are (according to the policy) always mapped to the same region boundary.
                 //First, collect all (relevant) parameters, i.e., the ones that are set to the lower or upper boundary.
@@ -325,15 +325,16 @@ namespace storm {
                 }
        //         std::cout << "Used Approximation" << std::endl;
                 for (auto const& varcount : VarCount){
-                  //  if(varcount.second.first > 0 && varcount.second.second > 0){
-          //              std::cout << "  Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ")" << std::endl;
-                 //   }
+                    if(varcount.second.first > 0 && varcount.second.second > 0){
+                        std::cout << "  Variable " << varcount.first << " has been set to lower " << varcount.second.first << " times and to upper " << varcount.second.second << " times. (total: " << substitutionCount << ", " << (computeLowerBounds ? "MIN" : "MAX") << ")" << std::endl;
+                    }
                 }
                 for (auto const& fixVar : fixedVariables){
                     //std::cout << "  APPROXMODEL: variable " << fixVar.first << " is always mapped to " << fixVar.second << std::endl;
                 }
                     
         //        std::cout << "    Result is " << this->solverData.result[this->solverData.initialStateIndex] << std::endl;
+     */
                 return this->solverData.result[this->solverData.initialStateIndex];
             }
             
@@ -415,6 +416,9 @@ namespace storm {
                                                           this->solverData.player1Goal.direction(), player2Goal.direction(), 
                                                           this->solverData.lastPlayer1Policy, policy,
                                                           this->matrixData.targetChoices, (this->computeRewards ? storm::utility::infinity<double>() : storm::utility::zero<double>()));
+                
+               // this->solverData.result = std::vector<double>(this->solverData.result.size(), 0.0);
+               // solver->solveGame(this->solverData.player1Goal.direction(), player2Goal.direction(), this->solverData.result, this->vectorData.vector);
             }
             
                 
diff --git a/src/solver/GameSolver.cpp b/src/solver/GameSolver.cpp
index 516cfa48c..9d94149d0 100644
--- a/src/solver/GameSolver.cpp
+++ b/src/solver/GameSolver.cpp
@@ -71,7 +71,7 @@ namespace storm {
                 ++iterations;
             } while (!converged && iterations < maximalNumberOfIterations);
             
-            STORM_LOG_WARN_COND(converged, "Iterative solver for stochastic two player games did not converge after " << iterations << "iterations.");
+            STORM_LOG_WARN_COND(converged, "Iterative solver for stochastic two player games did not converge after " << iterations << " iterations.");
             
             if(this->trackPolicies){
                 this->player2Policy = std::vector<storm::storage::sparse::state_type>(player2Matrix.getRowGroupCount());
diff --git a/src/utility/policyguessing.cpp b/src/utility/policyguessing.cpp
index cfd471111..f21e4577b 100644
--- a/src/utility/policyguessing.cpp
+++ b/src/utility/policyguessing.cpp
@@ -37,7 +37,7 @@ namespace storm {
                 storm::storage::BitVector probGreater0States;
                 getInducedEquationSystem(solver, b, player1Policy, player2Policy, targetChoices, inducedA, inducedB, probGreater0States);
                 
-                solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
+                solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
                 
                 solver.setPolicyTracking();
                 bool resultCorrect = false;
@@ -52,7 +52,7 @@ namespace storm {
                     if(!resultCorrect){
                         //If the policy could not be fixed, it indicates that our guessed values were to high.
                         STORM_LOG_WARN("Policies could not be fixed. Restarting Gamesolver. ");
-                        solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
+                        solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
                         //x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
                     }
                 }
@@ -107,7 +107,7 @@ namespace storm {
                 std::vector<ValueType> inducedB;
                 storm::storage::BitVector probGreater0States;
                 getInducedEquationSystem(solver, b, policy, targetChoices, inducedA, inducedB, probGreater0States);
-                solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
+                solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
                 
                 solver.setPolicyTracking();
                 bool resultCorrect = false;
@@ -122,16 +122,13 @@ namespace storm {
                     if(!resultCorrect){
                         //If the policy could not be fixed, it indicates that our guessed values were to high.
                         STORM_LOG_WARN("Policy could not be fixed. Restarting MinMaxsolver." );
-                        solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value);
-                        //x = std::vector<ValueType>(x.size(), storm::utility::zero<ValueType>());
+                        solveLinearEquationSystem(inducedA, x, inducedB, probGreater0States, prob0Value, solver.getPrecision(), solver.getRelative());
                     }
                 }
                 
                 
             }
             
-            
-            
             template <typename ValueType>
             void getInducedEquationSystem(storm::solver::GameSolver<ValueType> const& solver,
                                             std::vector<ValueType> const& b,
@@ -198,7 +195,9 @@ namespace storm {
                                            std::vector<ValueType>& x,
                                            std::vector<ValueType> const& b,
                                            storm::storage::BitVector const& probGreater0States,
-                                           ValueType const& prob0Value
+                                           ValueType const& prob0Value,
+                                           ValueType const& precision,
+                                           bool relative
                                         ){
                 //Get the submatrix/subvector A,x, and b and invoke linear equation solver
                 storm::storage::SparseMatrix<ValueType> subA = A.getSubmatrix(true, probGreater0States, probGreater0States, true);
@@ -209,7 +208,18 @@ namespace storm {
                 storm::utility::vector::selectVectorValues(subB, probGreater0States, b);
                 std::unique_ptr<storm::solver::LinearEquationSolver<ValueType>> linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
                 linEqSysSolver->solveEquationSystem(subX, subB);
-            
+                
+                //Performing a couple of iterations makes the result "stable" when doing value iteration, i.e.,
+                //if the given equation system is induced by an optimal policy, value iteration will terminate after the first iteration.
+                subA.convertToEquationSystem();
+                linEqSysSolver = storm::utility::solver::LinearEquationSolverFactory<ValueType>().create(subA);
+                std::vector<ValueType> copyX(subX.size());
+                std::size_t iterations = 100000; //don't run into an endless loop...
+                do {
+                    linEqSysSolver->performMatrixVectorMultiplication(subX, &subB, 10, &copyX);
+                    --iterations;
+                } while(!storm::utility::vector::equalModuloPrecision(subX, copyX, precision, relative) && iterations>0);
+                STORM_LOG_WARN_COND(iterations>0, "Iterations on result of linear equation solver did not converge.");
                 //fill in the result
                 storm::utility::vector::setVectorValues(x, probGreater0States, subX);
                 storm::utility::vector::setVectorValues(x, (~probGreater0States), prob0Value);
@@ -238,7 +248,7 @@ namespace storm {
                      * 2. There is another choice that leads to target
                      * 3. The value of that choice is equal to the value of the choice given by the policy
                      * Note that the values of the result will not change this way.
-                     * We do this unil the policy does not change anymore
+                     * We do this until the policy does not change anymore
                      */
                     policyChanged = false;
                     //Player 1:
@@ -403,7 +413,9 @@ namespace storm {
                                            std::vector<double>& x,
                                            std::vector<double> const& b,
                                            storm::storage::BitVector const& probGreater0States,
-                                           double const& prob0Value
+                                           double const& prob0Value,
+                                           double const& precision,
+                                           bool relative
                             );
             
             template bool checkAndFixPolicy<double>(storm::solver::GameSolver<double> const& solver,
diff --git a/src/utility/policyguessing.h b/src/utility/policyguessing.h
index 5c67c4271..0dd92d6b5 100644
--- a/src/utility/policyguessing.h
+++ b/src/utility/policyguessing.h
@@ -162,7 +162,9 @@ namespace storm {
                                            std::vector<ValueType>& x,
                                            std::vector<ValueType> const& b,
                                            storm::storage::BitVector const& probGreater0States,
-                                           ValueType const& prob0Value
+                                           ValueType const& prob0Value,
+                                           ValueType const& precision,
+                                           bool relative
                                         );
             
             /*!