From 9421a60d5a7a6fb822ba201af6f97f293ca7b211 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Thu, 24 Sep 2020 14:23:09 +0200
Subject: [PATCH] Fixes for multi-obj reward analysis.

---
 .../constraintbased/SparseCbQuery.cpp         |  6 +-
 .../pcaa/StandardPcaaWeightVectorChecker.cpp  | 12 ++--
 .../SparseMultiObjectiveRewardAnalysis.cpp    | 63 +++++++++----------
 .../SparseMultiObjectiveRewardAnalysis.h      |  6 +-
 4 files changed, 40 insertions(+), 47 deletions(-)
diff --git a/src/storm/modelchecker/multiobjective/constraintbased/SparseCbQuery.cpp b/src/storm/modelchecker/multiobjective/constraintbased/SparseCbQuery.cpp
index 7d2d680a8..30d808782 100644
--- a/src/storm/modelchecker/multiobjective/constraintbased/SparseCbQuery.cpp
+++ b/src/storm/modelchecker/multiobjective/constraintbased/SparseCbQuery.cpp
@@ -27,13 +27,13 @@ namespace storm {
                 auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis<SparseModelType>::analyze(preprocessorResult);
                 STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "TThere is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported.");
                 
-                STORM_LOG_THROW(rewardAnalysis.rewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
+                STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
                 STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
                 STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states.");
                 
                 // Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers.
                 // We can also merge the states that will have reward zero anyway.
-                storm::storage::BitVector maybeStates = rewardAnalysis.rewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
+                storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
                 std::set<std::string> relevantRewardModels;
                 for (auto const& obj : this->objectives) {
                     obj.formula->gatherReferencedRewardModels(relevantRewardModels);
@@ -42,7 +42,7 @@ namespace storm {
                 auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()));
                 
                 preprocessedModel = mergerResult.model;
-                reward0EStates = rewardAnalysis.reward0EStates % maybeStates;
+                reward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
                 if (mergerResult.targetState) {
                     // There is an additional state in the result
                     reward0EStates.resize(reward0EStates.size() + 1, true);
diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
index 90b78cf2a..cc50d3dc3 100644
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
@@ -39,14 +39,14 @@ namespace storm {
             void StandardPcaaWeightVectorChecker<SparseModelType>::initialize(preprocessing::SparseMultiObjectivePreprocessorResult<SparseModelType> const& preprocessorResult) {
                 auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis<SparseModelType>::analyze(preprocessorResult);
                 STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "There is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported.");
-                STORM_LOG_THROW(rewardAnalysis.rewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
+                STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
                 STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
                 STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states.");
 
                 // Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers.
                 // We can also merge the states that will have reward zero anyway.
-                storm::storage::BitVector maybeStates = rewardAnalysis.rewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
-                storm::storage::BitVector finiteRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.rewardLessInfinityEStates.get(), rewardAnalysis.rewardLessInfinityEStates.get());
+                storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
+                storm::storage::BitVector finiteRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
                 std::set<std::string> relevantRewardModels;
                 for (auto const& obj : this->objectives) {
                     obj.formula->gatherReferencedRewardModels(relevantRewardModels);
@@ -60,7 +60,7 @@ namespace storm {
                 // Initilize general data of the model
                 transitionMatrix = std::move(mergerResult.model->getTransitionMatrix());
                 initialState = *mergerResult.model->getInitialStates().begin();
-                reward0EStates = rewardAnalysis.reward0EStates % maybeStates;
+                reward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
                 if (mergerResult.targetState) {
                     // There is an additional state in the result
                     reward0EStates.resize(reward0EStates.size() + 1, true);
@@ -237,7 +237,7 @@ namespace storm {
                         }
                     }
                 }
-                
+
                 // Finally, we need to take care of states that will reach a bad state with prob greater 0 (including the bad states themselves).
                 // due to the precondition, we know that it has to be possible to eventually avoid the bad states for ever.
                 // Perform a backwards search from the avoid states and store choices with prob. 1
@@ -279,7 +279,6 @@ namespace storm {
             
             template <class SparseModelType>
             void StandardPcaaWeightVectorChecker<SparseModelType>::unboundedWeightedPhase(Environment const& env, std::vector<ValueType> const& weightedRewardVector, std::vector<ValueType> const& weightVector) {
-                
                 if (this->objectivesWithNoUpperTimeBound.empty() || !storm::utility::vector::hasNonZeroEntry(weightedRewardVector)) {
                     this->weightedResult = std::vector<ValueType>(transitionMatrix.getRowGroupCount(), storm::utility::zero<ValueType>());
                     // Get an arbitrary scheduler that yields finite reward for all objectives
@@ -566,6 +565,7 @@ namespace storm {
                         originalSolution[state] = reducedSolution[stateInReducedModel];
                         uint_fast64_t chosenRowInReducedModel = reducedMatrix.getRowGroupIndices()[stateInReducedModel] + reducedOptimalChoices[stateInReducedModel];
                         uint_fast64_t chosenRowInOriginalModel = reducedToOriginalChoiceMapping[chosenRowInReducedModel];
+                        
                         // Check if the state is a bottom state, i.e., the chosen row stays inside its EC.
                         bool stateIsBottom = reward0EStates.get(state);
                         for (auto const& entry : transitionMatrix.getRow(chosenRowInOriginalModel)) {
diff --git a/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.cpp b/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.cpp
index 2e88fda25..7ae3e3d73 100644
--- a/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.cpp
+++ b/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.cpp
@@ -48,46 +48,39 @@ namespace storm {
                     std::vector<uint_fast64_t> const& groupIndices = transitions.getRowGroupIndices();
                     storm::storage::BitVector allStates(stateCount, true);
     
-                    // Get the choices that yield non-zero reward
-                    storm::storage::BitVector zeroRewardChoices(preprocessorResult.preprocessedModel->getNumberOfChoices(), true);
+                    // Get the choices without any reward for the various objective types
+                    storm::storage::BitVector zeroLraRewardChoices(preprocessorResult.preprocessedModel->getNumberOfChoices(), true);
+                    storm::storage::BitVector zeroTotalRewardChoices(preprocessorResult.preprocessedModel->getNumberOfChoices(), true);
+                    storm::storage::BitVector zeroCumulativeRewardChoices(preprocessorResult.preprocessedModel->getNumberOfChoices(), true);
                     for (auto const& obj : preprocessorResult.objectives) {
                         if (obj.formula->isRewardOperatorFormula()) {
-                            if (obj.formula->getSubformula().isTotalRewardFormula() || obj.formula->getSubformula().isCumulativeRewardFormula()) {
-                                auto const& rewModel = preprocessorResult.preprocessedModel->getRewardModel(obj.formula->asRewardOperatorFormula().getRewardModelName());
-                                zeroRewardChoices &= rewModel.getChoicesWithZeroReward(transitions);
+                            auto const& rewModel = preprocessorResult.preprocessedModel->getRewardModel(obj.formula->asRewardOperatorFormula().getRewardModelName());
+                            if (obj.formula->getSubformula().isLongRunAverageRewardFormula()) {
+                                zeroLraRewardChoices &= rewModel.getChoicesWithZeroReward(transitions);
+                            } else if (obj.formula->getSubformula().isTotalRewardFormula()) {
+                                zeroTotalRewardChoices &= rewModel.getChoicesWithZeroReward(transitions);
                             } else {
-                                STORM_LOG_WARN_COND(obj.formula->getSubformula().isLongRunAverageRewardFormula(), "Analyzing subformula " << obj.formula->getSubformula() << " is not supported properly.");
+                                STORM_LOG_WARN_COND(obj.formula->getSubformula().isCumulativeRewardFormula(), "Analyzing subformula " << obj.formula->getSubformula() << " is not supported properly.");
+                                zeroCumulativeRewardChoices &= rewModel.getChoicesWithZeroReward(transitions);
                             }
                         }
                     }
                     
-                    // Get the states that have reward for at least one (or for all) choices assigned to it.
-                    storm::storage::BitVector statesWithRewardForOneChoice = storm::storage::BitVector(stateCount, false);
-                    storm::storage::BitVector statesWithRewardForAllChoices = storm::storage::BitVector(stateCount, true);
-                    for (uint_fast64_t state = 0; state < stateCount; ++state) {
-                        bool stateHasChoiceWithReward = false;
-                        bool stateHasChoiceWithoutReward = false;
-                        uint_fast64_t const& groupEnd = groupIndices[state + 1];
-                        for (uint_fast64_t choice = groupIndices[state]; choice < groupEnd; ++choice) {
-                            if (zeroRewardChoices.get(choice)) {
-                                stateHasChoiceWithoutReward = true;
-                            } else {
-                                stateHasChoiceWithReward = true;
-                            }
-                        }
-                        if (stateHasChoiceWithReward) {
-                            statesWithRewardForOneChoice.set(state, true);
-                        }
-                        if (stateHasChoiceWithoutReward) {
-                            statesWithRewardForAllChoices.set(state, false);
-                        }
-                    }
+                    // get the states for which there is a scheduler yielding total reward zero
+                    auto statesWithTotalRewardForAllChoices = transitions.getRowGroupFilter(~zeroTotalRewardChoices, true);
+                    result.totalReward0EStates = storm::utility::graph::performProbGreater0A(transitions, groupIndices, backwardTransitions, allStates, statesWithTotalRewardForAllChoices, false, 0, zeroTotalRewardChoices);
+                    result.totalReward0EStates.complement();
                     
-                    // get the states for which there is a scheduler yielding reward zero
-                    result.reward0EStates = storm::utility::graph::performProbGreater0A(transitions, groupIndices, backwardTransitions, allStates, statesWithRewardForAllChoices, false, 0, zeroRewardChoices);
-                    result.reward0EStates.complement();
-                    result.reward0AStates = storm::utility::graph::performProb0A(backwardTransitions, allStates, statesWithRewardForOneChoice);
-                    assert(result.reward0AStates.isSubsetOf(result.reward0EStates));
+                    // Get the states for which all schedulers yield a reward of 0
+                    // Starting with LRA objectives
+                    auto statesWithoutLraReward = transitions.getRowGroupFilter(zeroLraRewardChoices, true);
+                    // Compute Sat(Forall F (Forall G "LRAStatesWithoutReward"))
+                    auto forallGloballyStatesWithoutLraReward = storm::utility::graph::performProb0A(backwardTransitions, statesWithoutLraReward, ~statesWithoutLraReward);
+                    result.reward0AStates = storm::utility::graph::performProb1A(transitions, groupIndices, backwardTransitions, allStates, forallGloballyStatesWithoutLraReward);
+                    // Now also incorporate cumulative and total reward objectives
+                    auto statesWithTotalOrCumulativeReward = transitions.getRowGroupFilter(~(zeroTotalRewardChoices & zeroCumulativeRewardChoices), false);
+                    result.reward0AStates &= storm::utility::graph::performProb0A(backwardTransitions, allStates, statesWithTotalOrCumulativeReward);
+                    assert(result.reward0AStates.isSubsetOf(result.totalReward0EStates));
                 }
              
                 template<typename SparseModelType>
@@ -129,8 +122,8 @@ namespace storm {
                             result.rewardFinitenessType = RewardFinitenessType::Infinite;
                         } else {
                             // Check whether there is a scheduler under which all rewards are finite.
-                            result.rewardLessInfinityEStates = storm::utility::graph::performProb1E(transitions, groupIndices, backwardTransitions, allStates, result.reward0EStates);
-                            if ((result.rewardLessInfinityEStates.get() & preprocessorResult.preprocessedModel->getInitialStates()).empty()) {
+                            result.totalRewardLessInfinityEStates = storm::utility::graph::performProb1E(transitions, groupIndices, backwardTransitions, allStates, result.totalReward0EStates);
+                            if ((result.totalRewardLessInfinityEStates.get() & preprocessorResult.preprocessedModel->getInitialStates()).empty()) {
                                 // There is no scheduler that induces finite reward for the initial state
                                 result.rewardFinitenessType = RewardFinitenessType::Infinite;
                             } else {
@@ -138,7 +131,7 @@ namespace storm {
                             }
                         }
                     } else {
-                        result.rewardLessInfinityEStates = allStates;
+                        result.totalRewardLessInfinityEStates = allStates;
                     }
                 }
             
diff --git a/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.h b/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.h
index 73dc0b3b3..ed58e8a0e 100644
--- a/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.h
+++ b/src/storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.h
@@ -34,9 +34,9 @@ namespace storm {
                         RewardFinitenessType rewardFinitenessType;
                 
                         // The states of the preprocessed model for which...
-                        storm::storage::BitVector reward0EStates; // ... there is a scheduler such that all expected reward objectives have value zero
-                        storm::storage::BitVector reward0AStates; // ... all schedulers induce value 0 for all expected reward objectives
-                        boost::optional<storm::storage::BitVector> rewardLessInfinityEStates; // ... there is a scheduler yielding finite reward for all expected reward objectives
+                        storm::storage::BitVector totalReward0EStates; // ... there is a scheduler such that all expected total reward objectives have value zero
+                        storm::storage::BitVector reward0AStates; // ... all schedulers induce value 0 for all reward-based objectives
+                        boost::optional<storm::storage::BitVector> totalRewardLessInfinityEStates; // ... there is a scheduler yielding finite reward for all expected total reward objectives
                     };
 
                     /*!