From 29a7a7e865dc80a8a515743487a8ac5a91982e22 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Thu, 24 Sep 2020 14:57:13 +0200
Subject: [PATCH] WeightVectorChecker: Making initialization LRA-ready

---
 .../pcaa/StandardMaPcaaWeightVectorChecker.cpp    |  8 +++++++-
 .../pcaa/StandardMdpPcaaWeightVectorChecker.cpp   |  2 +-
 .../pcaa/StandardPcaaWeightVectorChecker.cpp      | 15 ++++++++++-----
 .../pcaa/StandardPcaaWeightVectorChecker.h        | 11 ++++++++---
 4 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp
index 72052768a..e1bad3580 100644
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp
@@ -35,7 +35,8 @@ namespace storm {
                 exitRates = model.getExitRates();
                 
                 // Set the (discretized) state action rewards.
-                this->actionRewards.resize(this->objectives.size());
+                this->actionRewards.assign(this->objectives.size(), {});
+                this->stateRewards.assign(this->objectives.size(), {});
                 for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
                     auto const& formula = *this->objectives[objIndex].formula;
                     STORM_LOG_THROW(formula.isRewardOperatorFormula() && formula.asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::UnexpectedException, "Unexpected type of operator formula: " << formula);
@@ -49,6 +50,11 @@ namespace storm {
                                 this->actionRewards[objIndex][model.getTransitionMatrix().getRowGroupIndices()[markovianState]] += rewModel.getStateReward(markovianState) / exitRates[markovianState];
                             }
                         }
+                    } else if (formula.getSubformula().isLongRunAverageRewardFormula()) {
+                        // The LRA methods for MA require keeping track of state- and action rewards separately
+                        if (rewModel.hasStateRewards()) {
+                            this->stateRewards[objIndex] = rewModel.getStateRewardVector();
+                        }
                     } else {
                         STORM_LOG_THROW(formula.getSubformula().isCumulativeRewardFormula() && formula.getSubformula().asCumulativeRewardFormula().getTimeBoundReference().isTimeBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
                         STORM_LOG_THROW(!rewModel.hasStateRewards(), storm::exceptions::InvalidPropertyException, "Found state rewards for time bounded objective " << this->objectives[objIndex].originalFormula << ". This is not supported.");
diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp
index 96a7e1de6..09abb8590 100644
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp
@@ -37,7 +37,7 @@ namespace storm {
                         auto const& cumulativeRewardFormula = formula.getSubformula().asCumulativeRewardFormula();
                         STORM_LOG_THROW(!cumulativeRewardFormula.isMultiDimensional() && !cumulativeRewardFormula.getTimeBoundReference().isRewardBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
                     } else {
-                        STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
+                        STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula() || formula.getSubformula().isLongRunAverageRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
                         
                     }
                     typename SparseMdpModelType::RewardModelType const& rewModel = model.getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());
diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
index cc50d3dc3..39d21d845 100644
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
@@ -40,19 +40,19 @@ namespace storm {
                 auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis<SparseModelType>::analyze(preprocessorResult);
                 STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "There is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported.");
                 STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
-                STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
+                STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (long run, total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
                 STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states.");
 
                 // Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers.
                 // We can also merge the states that will have reward zero anyway.
                 storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
-                storm::storage::BitVector finiteRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
+                storm::storage::BitVector finiteTotalRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
                 std::set<std::string> relevantRewardModels;
                 for (auto const& obj : this->objectives) {
                     obj.formula->gatherReferencedRewardModels(relevantRewardModels);
                 }
                 storm::transformer::GoalStateMerger<SparseModelType> merger(*preprocessorResult.preprocessedModel);
-                auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()), finiteRewardChoices);
+                auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()), finiteTotalRewardChoices);
                 
                 // Initialize data specific for the considered model type
                 initializeModelTypeSpecificData(*mergerResult.model);
@@ -60,10 +60,10 @@ namespace storm {
                 // Initilize general data of the model
                 transitionMatrix = std::move(mergerResult.model->getTransitionMatrix());
                 initialState = *mergerResult.model->getInitialStates().begin();
-                reward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
+                totalReward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
                 if (mergerResult.targetState) {
                     // There is an additional state in the result
-                    reward0EStates.resize(reward0EStates.size() + 1, true);
+                    totalReward0EStates.resize(totalReward0EStates.size() + 1, true);
                     
                     // The overapproximation for the possible ec choices consists of the states that can reach the target states with prob. 0 and the target state itself.
                     storm::storage::BitVector targetStateAsVector(transitionMatrix.getRowGroupCount(), false);
@@ -75,6 +75,7 @@ namespace storm {
                 }
                 
                 // set data for unbounded objectives
+                lraObjectives = storm::storage::BitVector(this->objectives.size(), false);
                 objectivesWithNoUpperTimeBound = storm::storage::BitVector(this->objectives.size(), false);
                 actionsWithoutRewardInUnboundedPhase = storm::storage::BitVector(transitionMatrix.getRowCount(), true);
                 for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
@@ -83,6 +84,10 @@ namespace storm {
                         objectivesWithNoUpperTimeBound.set(objIndex, true);
                         actionsWithoutRewardInUnboundedPhase &= storm::utility::vector::filterZero(actionRewards[objIndex]);
                     }
+                    if (formula.getSubformula().isLongRunAverageRewardFormula()) {
+                        lraObjectives.set(objIndex, true);
+                        objectivesWithNoUpperTimeBound.set(objIndex, true);
+                    }
                 }
                 
                 // initialize data for the results
diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h
index 3a6ac3c9f..a9fd3a014 100644
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h
@@ -117,11 +117,16 @@ namespace storm {
                 storm::storage::BitVector ecChoicesHint;
                 // The actions that have reward assigned for at least one objective without upper timeBound
                 storm::storage::BitVector actionsWithoutRewardInUnboundedPhase;
-                // The states for which there is a scheduler yielding reward 0 for each objective
-                storm::storage::BitVector reward0EStates;
+                // The states for which there is a scheduler yielding reward 0 for each total reward objective
+                storm::storage::BitVector totalReward0EStates;
                 // stores the state action rewards for each objective.
                 std::vector<std::vector<ValueType>> actionRewards;
+                // stores the state rewards for each objective.
+                // These are only relevant for LRA objectives for MAs (otherwise, they appear within the action rewards). For other objectives/models, the corresponding vector will be empty.
+                std::vector<std::vector<ValueType>> stateRewards;
                 
+                // stores the indices of the objectives for which we need to compute the long run average values
+                storm::storage::BitVector lraObjectives;
                 // stores the indices of the objectives for which there is no upper time bound
                 storm::storage::BitVector objectivesWithNoUpperTimeBound;
                 
@@ -137,7 +142,7 @@ namespace storm {
                 std::vector<ValueType> offsetsToUnderApproximation;
                 std::vector<ValueType> offsetsToOverApproximation;
                 // The scheduler choices that optimize the weighted rewards of undounded objectives.
-                std::vector<uint_fast64_t> optimalChoices;
+                std::vector<uint64_t> optimalChoices;
                 
                 struct EcQuotient {
                     storm::storage::SparseMatrix<ValueType> matrix;