WeightVectorChecker: Making initialization LRA-ready

5 years ago · 29a7a7e865
4 changed files with 26 additions and 10 deletions
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp
@ -35,7 +35,8 @@ namespace storm {
                exitRates = model.getExitRates();
                
                // Set the (discretized) state action rewards.
-                this->actionRewards.resize(this->objectives.size());
+                this->actionRewards.assign(this->objectives.size(), {});
+                this->stateRewards.assign(this->objectives.size(), {});
                for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
                    auto const& formula = *this->objectives[objIndex].formula;
                    STORM_LOG_THROW(formula.isRewardOperatorFormula() && formula.asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::UnexpectedException, "Unexpected type of operator formula: " << formula);
@ -49,6 +50,11 @@ namespace storm {
                                this->actionRewards[objIndex][model.getTransitionMatrix().getRowGroupIndices()[markovianState]] += rewModel.getStateReward(markovianState) / exitRates[markovianState];
                            }
                        }
+                    } else if (formula.getSubformula().isLongRunAverageRewardFormula()) {
+                        // The LRA methods for MA require keeping track of state- and action rewards separately
+                        if (rewModel.hasStateRewards()) {
+                            this->stateRewards[objIndex] = rewModel.getStateRewardVector();
+                        }
                    } else {
                        STORM_LOG_THROW(formula.getSubformula().isCumulativeRewardFormula() && formula.getSubformula().asCumulativeRewardFormula().getTimeBoundReference().isTimeBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
                        STORM_LOG_THROW(!rewModel.hasStateRewards(), storm::exceptions::InvalidPropertyException, "Found state rewards for time bounded objective " << this->objectives[objIndex].originalFormula << ". This is not supported.");
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp
@ -37,7 +37,7 @@ namespace storm {
                        auto const& cumulativeRewardFormula = formula.getSubformula().asCumulativeRewardFormula();
                        STORM_LOG_THROW(!cumulativeRewardFormula.isMultiDimensional() && !cumulativeRewardFormula.getTimeBoundReference().isRewardBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
                    } else {
-                        STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
+                        STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula() || formula.getSubformula().isLongRunAverageRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
                        
                    }
                    typename SparseMdpModelType::RewardModelType const& rewModel = model.getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
@ -40,19 +40,19 @@ namespace storm {
                auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis<SparseModelType>::analyze(preprocessorResult);
                STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "There is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported.");
                STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
-                STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
+                STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (long run, total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
                STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states.");

                // Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers.
                // We can also merge the states that will have reward zero anyway.
                storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
-                storm::storage::BitVector finiteRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
+                storm::storage::BitVector finiteTotalRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
                std::set<std::string> relevantRewardModels;
                for (auto const& obj : this->objectives) {
                    obj.formula->gatherReferencedRewardModels(relevantRewardModels);
                }
                storm::transformer::GoalStateMerger<SparseModelType> merger(*preprocessorResult.preprocessedModel);
-                auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()), finiteRewardChoices);
+                auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()), finiteTotalRewardChoices);
                
                // Initialize data specific for the considered model type
                initializeModelTypeSpecificData(*mergerResult.model);
@ -60,10 +60,10 @@ namespace storm {
                // Initilize general data of the model
                transitionMatrix = std::move(mergerResult.model->getTransitionMatrix());
                initialState = *mergerResult.model->getInitialStates().begin();
-                reward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
+                totalReward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
                if (mergerResult.targetState) {
                    // There is an additional state in the result
-                    reward0EStates.resize(reward0EStates.size() + 1, true);
+                    totalReward0EStates.resize(totalReward0EStates.size() + 1, true);
                    
                    // The overapproximation for the possible ec choices consists of the states that can reach the target states with prob. 0 and the target state itself.
                    storm::storage::BitVector targetStateAsVector(transitionMatrix.getRowGroupCount(), false);
@ -75,6 +75,7 @@ namespace storm {
                }
                
                // set data for unbounded objectives
+                lraObjectives = storm::storage::BitVector(this->objectives.size(), false);
                objectivesWithNoUpperTimeBound = storm::storage::BitVector(this->objectives.size(), false);
                actionsWithoutRewardInUnboundedPhase = storm::storage::BitVector(transitionMatrix.getRowCount(), true);
                for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
@ -83,6 +84,10 @@ namespace storm {
                        objectivesWithNoUpperTimeBound.set(objIndex, true);
                        actionsWithoutRewardInUnboundedPhase &= storm::utility::vector::filterZero(actionRewards[objIndex]);
                    }
+                    if (formula.getSubformula().isLongRunAverageRewardFormula()) {
+                        lraObjectives.set(objIndex, true);
+                        objectivesWithNoUpperTimeBound.set(objIndex, true);
+                    }
                }
                
                // initialize data for the results
--- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h
+++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h
@ -117,11 +117,16 @@ namespace storm {
                storm::storage::BitVector ecChoicesHint;
                // The actions that have reward assigned for at least one objective without upper timeBound
                storm::storage::BitVector actionsWithoutRewardInUnboundedPhase;
-                // The states for which there is a scheduler yielding reward 0 for each objective
-                storm::storage::BitVector reward0EStates;
+                // The states for which there is a scheduler yielding reward 0 for each total reward objective
+                storm::storage::BitVector totalReward0EStates;
                // stores the state action rewards for each objective.
                std::vector<std::vector<ValueType>> actionRewards;
+                // stores the state rewards for each objective.
+                // These are only relevant for LRA objectives for MAs (otherwise, they appear within the action rewards). For other objectives/models, the corresponding vector will be empty.
+                std::vector<std::vector<ValueType>> stateRewards;
                
+                // stores the indices of the objectives for which we need to compute the long run average values
+                storm::storage::BitVector lraObjectives;
                // stores the indices of the objectives for which there is no upper time bound
                storm::storage::BitVector objectivesWithNoUpperTimeBound;
                
@ -137,7 +142,7 @@ namespace storm {
                std::vector<ValueType> offsetsToUnderApproximation;
                std::vector<ValueType> offsetsToOverApproximation;
                // The scheduler choices that optimize the weighted rewards of undounded objectives.
-                std::vector<uint_fast64_t> optimalChoices;
+                std::vector<uint64_t> optimalChoices;
                
                struct EcQuotient {
                    storm::storage::SparseMatrix<ValueType> matrix;