From c674de5893b5d7d19c667e4079c573490d005afc Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 11 Aug 2020 16:53:05 +0200
Subject: [PATCH] Deterministic infinite horizon: Added gain-bias and
 lra-distribution based solution methods

---
 ...arseDeterministicInfiniteHorizonHelper.cpp | 260 +++++++++++++++++-
 ...SparseDeterministicInfiniteHorizonHelper.h |  12 +-
 2 files changed, 259 insertions(+), 13 deletions(-)
diff --git a/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.cpp b/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.cpp
index d6dc8eb3c..f7327d8c0 100644
--- a/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.cpp
+++ b/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.cpp
@@ -17,9 +17,10 @@
 #include "storm/utility/vector.h"
 
 #include "storm/environment/solver/LongRunAverageSolverEnvironment.h"
-#include "storm/environment/solver/MinMaxSolverEnvironment.h"
+#include "storm/environment/solver/TopologicalSolverEnvironment.h"
 
 #include "storm/exceptions/UnmetRequirementException.h"
+#include "storm/exceptions/NotSupportedException.h"
 
 namespace storm {
     namespace modelchecker {
@@ -46,12 +47,12 @@ namespace storm {
             }
 
             template <typename ValueType>
-            ValueType SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForComponent(Environment const& env, ValueGetter const& stateRewardsGetter, ValueGetter const& actionRewardsGetter, storm::storage::StronglyConnectedComponent const& component) {
+            ValueType SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForComponent(Environment const& env, ValueGetter const& stateValueGetter, ValueGetter const& actionValueGetter, storm::storage::StronglyConnectedComponent const& component) {
                 // For deterministic models, we compute the LRA for a BSCC
                 
                 STORM_LOG_ASSERT(!this->isProduceSchedulerSet(), "Scheduler production enabled for deterministic model.");
                 
-                auto trivialResult = computeLraForTrivialBscc(env, stateRewardsGetter, actionRewardsGetter, component);
+                auto trivialResult = computeLraForTrivialBscc(env, stateValueGetter, actionValueGetter, component);
                 if (trivialResult.first) {
                     return trivialResult.second;
                 }
@@ -67,18 +68,18 @@ namespace storm {
                 }
                 STORM_LOG_TRACE("Computing LRA for BSCC of size " << component.size() << " using '" << storm::solver::toString(method) << "'.");
                 if (method == storm::solver::LraMethod::ValueIteration) {
-                    return computeLraForBsccVi(env, stateRewardsGetter, actionRewardsGetter, component);
-                }/* else if (method == storm::solver::LraMethod::LraDistributionEquations) {
+                    return computeLraForBsccVi(env, stateValueGetter, actionValueGetter, component);
+                } else if (method == storm::solver::LraMethod::LraDistributionEquations) {
                     // We only need the first element of the pair as the lra distribution is not relevant at this point.
-                    return computeLongRunAveragesForBsccLraDistr<ValueType>(env, bscc, rateMatrix, valueGetter, exitRateVector).first;
+                    return computeLraForBsccLraDistr(env, stateValueGetter, actionValueGetter, component).first;
                 }
                 STORM_LOG_WARN_COND(method == storm::solver::LraMethod::GainBiasEquations, "Unsupported lra method selected. Defaulting to " << storm::solver::toString(storm::solver::LraMethod::GainBiasEquations) << ".");
                 // We don't need the bias values
-                return computeLongRunAveragesForBsccGainBias<ValueType>(env, bscc, rateMatrix, valueGetter, exitRateVector).first;*/
+                return computeLraForBsccGainBias(env, stateValueGetter, actionValueGetter, component).first;
             }
             
             template <typename ValueType>
-            std::pair<bool, ValueType> SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForTrivialBscc(Environment const& env, ValueGetter const& stateRewardsGetter, ValueGetter const& actionRewardsGetter, storm::storage::StronglyConnectedComponent const& component) {
+            std::pair<bool, ValueType> SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForTrivialBscc(Environment const& env, ValueGetter const& stateValueGetter, ValueGetter const& actionValueGetter, storm::storage::StronglyConnectedComponent const& component) {
                 
                 // For deterministic models, we can catch the case where all values are the same. This includes the special case where the BSCC consist only of just one state.
                 bool first = true;
@@ -86,8 +87,9 @@ namespace storm {
                 for (auto const& element : component) {
                     auto state = internal::getComponentElementState(element);
                     STORM_LOG_ASSERT(state == *internal::getComponentElementChoicesBegin(element), "Unexpected choice index at state " << state << " of deterministic model.");
-                    ValueType curr = stateRewardsGetter(state) + (this->isContinuousTime() ? (*this->_exitRates)[state] * actionRewardsGetter(state) : actionRewardsGetter(state));
+                    ValueType curr = stateValueGetter(state) + (this->isContinuousTime() ? (*this->_exitRates)[state] * actionValueGetter(state) : actionValueGetter(state));
                     if (first) {
+                        val = curr;
                         first = false;
                     } else if (val != curr) {
                         return {false, storm::utility::zero<ValueType>()};
@@ -98,8 +100,12 @@ namespace storm {
             }
             
     
+            template <>
+            storm::RationalFunction SparseDeterministicInfiniteHorizonHelper<storm::RationalFunction>::computeLraForBsccVi(Environment const& env, ValueGetter const& stateValueGetter, ValueGetter const& actionValueGetter, storm::storage::StronglyConnectedComponent const& bscc) {
+                STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "The requested Method for LRA computation is not supported for parametric models.");
+            }
             template <typename ValueType>
-            ValueType SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForBsccVi(Environment const& env, ValueGetter const& stateRewardsGetter, ValueGetter const& actionRewardsGetter, storm::storage::StronglyConnectedComponent const& bscc) {
+            ValueType SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForBsccVi(Environment const& env, ValueGetter const& stateValueGetter, ValueGetter const& actionValueGetter, storm::storage::StronglyConnectedComponent const& bscc) {
 
                 // Collect parameters of the computation
                 ValueType aperiodicFactor = storm::utility::convertNumber<ValueType>(env.solver().lra().getAperiodicFactor());
@@ -108,14 +114,243 @@ namespace storm {
                 if (this->isContinuousTime()) {
                     // We assume a CTMC (with deterministic timed states and no instant states)
                     storm::modelchecker::helper::internal::LraViHelper<ValueType, storm::storage::StronglyConnectedComponent, storm::modelchecker::helper::internal::LraViTransitionsType::DetTsNoIs> viHelper(bscc, this->_transitionMatrix, aperiodicFactor, this->_markovianStates, this->_exitRates);
-                    return viHelper.performValueIteration(env, stateRewardsGetter, actionRewardsGetter, this->_exitRates);
+                    return viHelper.performValueIteration(env, stateValueGetter, actionValueGetter, this->_exitRates);
                 } else {
                     // We assume a DTMC (with deterministic timed states and no instant states)
                     storm::modelchecker::helper::internal::LraViHelper<ValueType, storm::storage::StronglyConnectedComponent, storm::modelchecker::helper::internal::LraViTransitionsType::DetTsNoIs> viHelper(bscc, this->_transitionMatrix, aperiodicFactor);
-                    return viHelper.performValueIteration(env, stateRewardsGetter, actionRewardsGetter);
+                    return viHelper.performValueIteration(env, stateValueGetter, actionValueGetter);
                 }
             }
             
+            template <typename ValueType>
+            std::pair<ValueType, std::vector<ValueType>> SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForBsccGainBias(Environment const& env, ValueGetter const& stateValuesGetter, ValueGetter const& actionValuesGetter, storm::storage::StronglyConnectedComponent const& bscc) {
+                // We build the equation system as in Line 3 of Algorithm 3 from
+                // Kretinsky, Meggendorfer: Efficient Strategy Iteration for Mean Payoff in Markov Decision Processes (ATVA 2017)
+                // https://doi.org/10.1007/978-3-319-68167-2_25
+                // The first variable corresponds to the gain of the bscc whereas the subsequent variables yield the bias for each state s_1, s_2, ....
+                // No bias variable for s_0 is needed since it is always set to zero, yielding an nxn equation system matrix
+                // To make this work for CTMC, we could uniformize the model. This preserves LRA and ensures that we can compute the
+                // LRA as for a DTMC (the soujourn time in each state is the same). If we then multiply the equations with the uniformization rate,
+                // the uniformization rate cancels out. Hence, we obtain the equation system below.
+                
+                // Get a mapping from global state indices to local ones.
+                std::unordered_map<uint64_t, uint64_t> toLocalIndexMap;
+                uint64_t localIndex = 0;
+                for (auto const& globalIndex : bscc) {
+                    toLocalIndexMap[globalIndex] = localIndex;
+                    ++localIndex;
+                }
+                
+                // Prepare an environment for the underlying equation solver
+                auto subEnv = env;
+                if (subEnv.solver().getLinearEquationSolverType() == storm::solver::EquationSolverType::Topological) {
+                    // Topological solver does not make any sense since the BSCC is connected.
+                    subEnv.solver().setLinearEquationSolverType(subEnv.solver().topological().getUnderlyingEquationSolverType(), subEnv.solver().topological().isUnderlyingEquationSolverTypeSetFromDefault());
+                }
+                subEnv.solver().setLinearEquationSolverPrecision(env.solver().lra().getPrecision(), env.solver().lra().getRelativeTerminationCriterion());
+                
+                // Build the equation system matrix and vector.
+                storm::solver::GeneralLinearEquationSolverFactory<ValueType> linearEquationSolverFactory;
+                bool isEquationSystemFormat = linearEquationSolverFactory.getEquationProblemFormat(subEnv) == storm::solver::LinearEquationSolverProblemFormat::EquationSystem;
+                storm::storage::SparseMatrixBuilder<ValueType> builder(bscc.size(), bscc.size());
+                std::vector<ValueType> eqSysVector;
+                eqSysVector.reserve(bscc.size());
+                // The first row asserts that the weighted bias variables and the reward at s_0 sum up to the gain
+                uint64_t row = 0;
+                ValueType entryValue;
+                for (auto const& globalState : bscc) {
+                    ValueType rateAtState = this->_exitRates ? (*this->_exitRates)[globalState] : storm::utility::one<ValueType>();
+                    // Coefficient for the gain variable
+                    if (isEquationSystemFormat) {
+                        // '1-0' in row 0 and -(-1) in other rows
+                        builder.addNextValue(row, 0, storm::utility::one<ValueType>());
+                    } else if (row > 0) {
+                        // No coeficient in row 0, othwerise substract the gain
+                        builder.addNextValue(row, 0, -storm::utility::one<ValueType>());
+                    }
+                    // Compute weighted sum over successor state. As this is a BSCC, each successor state will again be in the BSCC.
+                    auto diagonalValue = storm::utility::zero<ValueType>();
+                    if (row > 0) {
+                        if (isEquationSystemFormat) {
+                            diagonalValue = rateAtState;
+                        } else {
+                            diagonalValue = storm::utility::one<ValueType>() - rateAtState;
+                        }
+                    }
+                    bool needDiagonalEntry = !storm::utility::isZero(diagonalValue);
+                    for (auto const& entry : this->_transitionMatrix.getRow(globalState)) {
+                        uint64_t col = toLocalIndexMap[entry.getColumn()];
+                        if (col == 0) {
+                            //Skip transition to state_0. This corresponds to setting the bias of state_0 to zero
+                            continue;
+                        }
+                        entryValue = entry.getValue() * rateAtState;
+                        if (isEquationSystemFormat) {
+                            entryValue = -entryValue;
+                        }
+                        if (needDiagonalEntry && col >= row) {
+                            if (col == row) {
+                                entryValue += diagonalValue;
+                            } else { // col > row
+                                builder.addNextValue(row, row, diagonalValue);
+                            }
+                            needDiagonalEntry = false;
+                        }
+                        builder.addNextValue(row, col, entryValue);
+                    }
+                    if (needDiagonalEntry) {
+                        builder.addNextValue(row, row, diagonalValue);
+                    }
+
+                    eqSysVector.push_back(stateValuesGetter(globalState) + rateAtState * actionValuesGetter(globalState));
+                    ++row;
+                }
+
+                // Create a linear equation solver
+                auto solver = linearEquationSolverFactory.create(subEnv, builder.build());
+                // Check solver requirements.
+                auto requirements = solver->getRequirements(subEnv);
+                STORM_LOG_THROW(!requirements.hasEnabledCriticalRequirement(), storm::exceptions::UnmetRequirementException, "Solver requirements " + requirements.getEnabledRequirementsAsString() + " not checked.");
+                // Todo: Find bounds on the bias variables. Just inserting the maximal value from the vector probably does not work.
+                
+                std::vector<ValueType> eqSysSol(bscc.size(), storm::utility::zero<ValueType>());
+                // Take the mean of the rewards as an initial guess for the gain
+                //eqSysSol.front() = std::accumulate(eqSysVector.begin(), eqSysVector.end(), storm::utility::zero<ValueType>()) / storm::utility::convertNumber<ValueType, uint64_t>(bscc.size());
+                solver->solveEquations(subEnv, eqSysSol, eqSysVector);
+                
+                ValueType gain = eqSysSol.front();
+                // insert bias value for state 0
+                eqSysSol.front() = storm::utility::zero<ValueType>();
+                // Return the gain and the bias values
+                return std::pair<ValueType, std::vector<ValueType>>(std::move(gain), std::move(eqSysSol));
+            }
+            
+            template <typename ValueType>
+            std::pair<ValueType, std::vector<ValueType>> SparseDeterministicInfiniteHorizonHelper<ValueType>::computeLraForBsccLraDistr(Environment const& env, ValueGetter const& stateValuesGetter, ValueGetter const& actionValuesGetter, storm::storage::StronglyConnectedComponent const& bscc) {
+
+                // Let A be ab auxiliary Matrix with A[s,s] =  R(s,s) - r(s) & A[s,s'] = R(s,s') for s,s' in BSCC and s!=s'.
+                // We build and solve the equation system for
+                // x*A=0 &  x_0+...+x_n=1  <=>  A^t*x=0=x-x & x_0+...+x_n=1  <=> (1+A^t)*x = x & 1-x_0-...-x_n-1=x_n
+                // Then, x[i] will be the fraction of the time we are in state i.
+                
+                // This method assumes that this BSCC consist of more than one state
+                if (bscc.size() == 1) {
+                    ValueType lraValue = stateValuesGetter(*bscc.begin()) + (this->isContinuousTime() ? (*this->_exitRates)[*bscc.begin()] * actionValuesGetter(*bscc.begin()) : actionValuesGetter(*bscc.begin()));
+                    return { lraValue, {storm::utility::one<ValueType>()} };
+                }
+                
+                // Prepare an environment for the underlying linear equation solver
+                auto subEnv = env;
+                if (subEnv.solver().getLinearEquationSolverType() == storm::solver::EquationSolverType::Topological) {
+                    // Topological solver does not make any sense since the BSCC is connected.
+                    subEnv.solver().setLinearEquationSolverType(subEnv.solver().topological().getUnderlyingEquationSolverType(), subEnv.solver().topological().isUnderlyingEquationSolverTypeSetFromDefault());
+                }
+                subEnv.solver().setLinearEquationSolverPrecision(env.solver().lra().getPrecision(), env.solver().lra().getRelativeTerminationCriterion());
+                
+                // Get a mapping from global state indices to local ones as well as a bitvector containing states within the BSCC.
+                std::unordered_map<uint64_t, uint64_t> toLocalIndexMap;
+                storm::storage::BitVector bsccStates(this->_transitionMatrix.getRowCount(), false);
+                uint64_t localIndex = 0;
+                for (auto const& globalIndex : bscc) {
+                    bsccStates.set(globalIndex, true);
+                    toLocalIndexMap[globalIndex] = localIndex;
+                    ++localIndex;
+                }
+                
+                // Build the auxiliary Matrix A.
+                auto auxMatrix = this->_transitionMatrix.getSubmatrix(false, bsccStates, bsccStates, true); // add diagonal entries!
+                uint64_t row = 0;
+                for (auto const& globalIndex : bscc) {
+                    ValueType rateAtState = this->_exitRates ? (*this->_exitRates)[globalIndex] : storm::utility::one<ValueType>();
+                    for (auto& entry : auxMatrix.getRow(row)) {
+                        if (entry.getColumn() == row) {
+                            // This value is non-zero since we have a BSCC with more than one state
+                            entry.setValue(rateAtState * (entry.getValue() - storm::utility::one<ValueType>()));
+                        } else if (this->isContinuousTime()) {
+                            entry.setValue(entry.getValue() * rateAtState);
+                        }
+                    }
+                    ++row;
+                }
+                assert(row == auxMatrix.getRowCount());
+                
+                // We need to consider A^t. This will not delete diagonal entries since they are non-zero.
+                auxMatrix = auxMatrix.transpose();
+                
+                // Check whether we need the fixpoint characterization
+                storm::solver::GeneralLinearEquationSolverFactory<ValueType> linearEquationSolverFactory;
+                bool isFixpointFormat = linearEquationSolverFactory.getEquationProblemFormat(subEnv) == storm::solver::LinearEquationSolverProblemFormat::FixedPointSystem;
+                if (isFixpointFormat) {
+                    // Add a 1 on the diagonal
+                    for (row = 0; row < auxMatrix.getRowCount(); ++row) {
+                        for (auto& entry : auxMatrix.getRow(row)) {
+                            if (entry.getColumn() == row) {
+                                entry.setValue(storm::utility::one<ValueType>() + entry.getValue());
+                            }
+                        }
+                    }
+                }
+                
+                // We now build the equation system matrix.
+                // We can drop the last row of A and add ones in this row instead to assert that the variables sum up to one
+                // Phase 1: replace the existing entries of the last row with ones
+                uint64_t col = 0;
+                uint64_t lastRow = auxMatrix.getRowCount() - 1;
+                for (auto& entry : auxMatrix.getRow(lastRow)) {
+                    entry.setColumn(col);
+                    if (isFixpointFormat) {
+                        if (col == lastRow) {
+                            entry.setValue(storm::utility::zero<ValueType>());
+                        } else {
+                            entry.setValue(-storm::utility::one<ValueType>());
+                        }
+                    } else {
+                        entry.setValue(storm::utility::one<ValueType>());
+                    }
+                    ++col;
+                }
+                storm::storage::SparseMatrixBuilder<ValueType> builder(std::move(auxMatrix));
+                for (; col <= lastRow; ++col) {
+                    if (isFixpointFormat) {
+                        if (col != lastRow) {
+                            builder.addNextValue(lastRow, col, -storm::utility::one<ValueType>());
+                        }
+                    } else {
+                        builder.addNextValue(lastRow, col, storm::utility::one<ValueType>());
+                    }
+                }
+                
+                std::vector<ValueType> bsccEquationSystemRightSide(bscc.size(), storm::utility::zero<ValueType>());
+                bsccEquationSystemRightSide.back() = storm::utility::one<ValueType>();
+                
+                // Create a linear equation solver
+                auto solver = linearEquationSolverFactory.create(subEnv,  builder.build());
+                solver->setBounds(storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
+                // Check solver requirements.
+                auto requirements = solver->getRequirements(subEnv);
+                requirements.clearLowerBounds();
+                requirements.clearUpperBounds();
+                STORM_LOG_THROW(!requirements.hasEnabledCriticalRequirement(), storm::exceptions::UnmetRequirementException, "Solver requirements " + requirements.getEnabledRequirementsAsString() + " not checked.");
+                
+                std::vector<ValueType> lraDistr(bscc.size(), storm::utility::one<ValueType>() / storm::utility::convertNumber<ValueType, uint64_t>(bscc.size()));
+                solver->solveEquations(subEnv, lraDistr, bsccEquationSystemRightSide);
+                
+                // Calculate final LRA Value
+                ValueType result = storm::utility::zero<ValueType>();
+                auto solIt = lraDistr.begin();
+                for (auto const& globalState : bscc) {
+                    if (this->isContinuousTime()) {
+                        result += (*solIt) * (stateValuesGetter(globalState) + (*this->_exitRates)[globalState] * actionValuesGetter(globalState));
+                    } else {
+                        result += (*solIt) * (stateValuesGetter(globalState) + actionValuesGetter(globalState));
+                    }
+                    ++solIt;
+                }
+                assert(solIt == lraDistr.end());
+
+                return std::pair<ValueType, std::vector<ValueType>>(std::move(result), std::move(lraDistr));
+            }
+
             template <typename ValueType>
             std::pair<storm::storage::SparseMatrix<ValueType>, std::vector<ValueType>> SparseDeterministicInfiniteHorizonHelper<ValueType>::buildSspMatrixVector(std::vector<ValueType> const& bsccLraValues, std::vector<uint64_t> const& inputStateToBsccIndexMap, storm::storage::BitVector const& statesNotInComponent, bool asEquationSystem) {
                 
@@ -203,6 +438,7 @@ namespace storm {
             
             template class SparseDeterministicInfiniteHorizonHelper<double>;
             template class SparseDeterministicInfiniteHorizonHelper<storm::RationalNumber>;
+            template class SparseDeterministicInfiniteHorizonHelper<storm::RationalFunction>;
             
         }
     }
diff --git a/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.h b/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.h
index 5c22cc1af..b5a78fad1 100644
--- a/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.h
+++ b/src/storm/modelchecker/helper/infinitehorizon/SparseDeterministicInfiniteHorizonHelper.h
@@ -46,10 +46,20 @@ namespace storm {
                 std::pair<bool, ValueType> computeLraForTrivialBscc(Environment const& env, ValueGetter const& stateValuesGetter,  ValueGetter const& actionValuesGetter, storm::storage::StronglyConnectedComponent const& bscc);
                 
                 /*!
-                 * As computeLraForMec but uses value iteration as a solution method (independent of what is set in env)
+                 * As computeLraForComponent but uses value iteration as a solution method (independent of what is set in env)
                  */
                 ValueType computeLraForBsccVi(Environment const& env, ValueGetter const& stateValuesGetter, ValueGetter const& actionValuesGetter, storm::storage::StronglyConnectedComponent const& bscc);
                 
+                /*!
+                 * As computeLraForComponent but solves a linear equation system encoding gain and bias (independent of what is set in env)
+                 * @see Kretinsky, Meggendorfer: Efficient Strategy Iteration for Mean Payoff in Markov Decision Processes (ATVA 2017), https://doi.org/10.1007/978-3-319-68167-2_25
+                 */
+                std::pair<ValueType, std::vector<ValueType>> computeLraForBsccGainBias(Environment const& env, ValueGetter const& stateValuesGetter, ValueGetter const& actionValuesGetter, storm::storage::StronglyConnectedComponent const& bscc);
+                /*!
+                 * As computeLraForComponent but solves a linear equation system consisting encoding the long run average (steady state) distribution (independent of what is set in env)
+                 */
+                std::pair<ValueType, std::vector<ValueType>> computeLraForBsccLraDistr(Environment const& env, ValueGetter const& stateValuesGetter, ValueGetter const& actionValuesGetter, storm::storage::StronglyConnectedComponent const& bscc);
+  
                 std::pair<storm::storage::SparseMatrix<ValueType>, std::vector<ValueType>> buildSspMatrixVector(std::vector<ValueType> const& bsccLraValues, std::vector<uint64_t> const& inputStateToBsccIndexMap, storm::storage::BitVector const& statesNotInComponent, bool asEquationSystem);
                 
                 /*!