From 29a7a7e865dc80a8a515743487a8ac5a91982e22 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 24 Sep 2020 14:57:13 +0200 Subject: [PATCH] WeightVectorChecker: Making initialization LRA-ready --- .../pcaa/StandardMaPcaaWeightVectorChecker.cpp | 8 +++++++- .../pcaa/StandardMdpPcaaWeightVectorChecker.cpp | 2 +- .../pcaa/StandardPcaaWeightVectorChecker.cpp | 15 ++++++++++----- .../pcaa/StandardPcaaWeightVectorChecker.h | 11 ++++++++--- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp index 72052768a..e1bad3580 100644 --- a/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp +++ b/src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp @@ -35,7 +35,8 @@ namespace storm { exitRates = model.getExitRates(); // Set the (discretized) state action rewards. - this->actionRewards.resize(this->objectives.size()); + this->actionRewards.assign(this->objectives.size(), {}); + this->stateRewards.assign(this->objectives.size(), {}); for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { auto const& formula = *this->objectives[objIndex].formula; STORM_LOG_THROW(formula.isRewardOperatorFormula() && formula.asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::UnexpectedException, "Unexpected type of operator formula: " << formula); @@ -49,6 +50,11 @@ namespace storm { this->actionRewards[objIndex][model.getTransitionMatrix().getRowGroupIndices()[markovianState]] += rewModel.getStateReward(markovianState) / exitRates[markovianState]; } } + } else if (formula.getSubformula().isLongRunAverageRewardFormula()) { + // The LRA methods for MA require keeping track of state- and action rewards separately + if (rewModel.hasStateRewards()) { + this->stateRewards[objIndex] = rewModel.getStateRewardVector(); + } } else { STORM_LOG_THROW(formula.getSubformula().isCumulativeRewardFormula() && formula.getSubformula().asCumulativeRewardFormula().getTimeBoundReference().isTimeBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula()); STORM_LOG_THROW(!rewModel.hasStateRewards(), storm::exceptions::InvalidPropertyException, "Found state rewards for time bounded objective " << this->objectives[objIndex].originalFormula << ". This is not supported."); diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp index 96a7e1de6..09abb8590 100644 --- a/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp +++ b/src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp @@ -37,7 +37,7 @@ namespace storm { auto const& cumulativeRewardFormula = formula.getSubformula().asCumulativeRewardFormula(); STORM_LOG_THROW(!cumulativeRewardFormula.isMultiDimensional() && !cumulativeRewardFormula.getTimeBoundReference().isRewardBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula()); } else { - STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula()); + STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula() || formula.getSubformula().isLongRunAverageRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula()); } typename SparseMdpModelType::RewardModelType const& rewModel = model.getRewardModel(formula.asRewardOperatorFormula().getRewardModelName()); diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp index cc50d3dc3..39d21d845 100644 --- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp +++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp @@ -40,19 +40,19 @@ namespace storm { auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis::analyze(preprocessorResult); STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "There is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported."); STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing."); - STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker."); + STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (long run, total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker."); STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states."); // Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers. // We can also merge the states that will have reward zero anyway. storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates; - storm::storage::BitVector finiteRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get()); + storm::storage::BitVector finiteTotalRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get()); std::set relevantRewardModels; for (auto const& obj : this->objectives) { obj.formula->gatherReferencedRewardModels(relevantRewardModels); } storm::transformer::GoalStateMerger merger(*preprocessorResult.preprocessedModel); - auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector(relevantRewardModels.begin(), relevantRewardModels.end()), finiteRewardChoices); + auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector(relevantRewardModels.begin(), relevantRewardModels.end()), finiteTotalRewardChoices); // Initialize data specific for the considered model type initializeModelTypeSpecificData(*mergerResult.model); @@ -60,10 +60,10 @@ namespace storm { // Initilize general data of the model transitionMatrix = std::move(mergerResult.model->getTransitionMatrix()); initialState = *mergerResult.model->getInitialStates().begin(); - reward0EStates = rewardAnalysis.totalReward0EStates % maybeStates; + totalReward0EStates = rewardAnalysis.totalReward0EStates % maybeStates; if (mergerResult.targetState) { // There is an additional state in the result - reward0EStates.resize(reward0EStates.size() + 1, true); + totalReward0EStates.resize(totalReward0EStates.size() + 1, true); // The overapproximation for the possible ec choices consists of the states that can reach the target states with prob. 0 and the target state itself. storm::storage::BitVector targetStateAsVector(transitionMatrix.getRowGroupCount(), false); @@ -75,6 +75,7 @@ namespace storm { } // set data for unbounded objectives + lraObjectives = storm::storage::BitVector(this->objectives.size(), false); objectivesWithNoUpperTimeBound = storm::storage::BitVector(this->objectives.size(), false); actionsWithoutRewardInUnboundedPhase = storm::storage::BitVector(transitionMatrix.getRowCount(), true); for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { @@ -83,6 +84,10 @@ namespace storm { objectivesWithNoUpperTimeBound.set(objIndex, true); actionsWithoutRewardInUnboundedPhase &= storm::utility::vector::filterZero(actionRewards[objIndex]); } + if (formula.getSubformula().isLongRunAverageRewardFormula()) { + lraObjectives.set(objIndex, true); + objectivesWithNoUpperTimeBound.set(objIndex, true); + } } // initialize data for the results diff --git a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h index 3a6ac3c9f..a9fd3a014 100644 --- a/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h +++ b/src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h @@ -117,11 +117,16 @@ namespace storm { storm::storage::BitVector ecChoicesHint; // The actions that have reward assigned for at least one objective without upper timeBound storm::storage::BitVector actionsWithoutRewardInUnboundedPhase; - // The states for which there is a scheduler yielding reward 0 for each objective - storm::storage::BitVector reward0EStates; + // The states for which there is a scheduler yielding reward 0 for each total reward objective + storm::storage::BitVector totalReward0EStates; // stores the state action rewards for each objective. std::vector> actionRewards; + // stores the state rewards for each objective. + // These are only relevant for LRA objectives for MAs (otherwise, they appear within the action rewards). For other objectives/models, the corresponding vector will be empty. + std::vector> stateRewards; + // stores the indices of the objectives for which we need to compute the long run average values + storm::storage::BitVector lraObjectives; // stores the indices of the objectives for which there is no upper time bound storm::storage::BitVector objectivesWithNoUpperTimeBound; @@ -137,7 +142,7 @@ namespace storm { std::vector offsetsToUnderApproximation; std::vector offsetsToOverApproximation; // The scheduler choices that optimize the weighted rewards of undounded objectives. - std::vector optimalChoices; + std::vector optimalChoices; struct EcQuotient { storm::storage::SparseMatrix matrix;