From 3215f255137df0403b57fcddecd722e9c7f6ceef Mon Sep 17 00:00:00 2001 From: TimQu Date: Thu, 19 Oct 2017 21:54:43 +0200 Subject: [PATCH] upper result bounds for cumulative reward formulas to enable interval iteration --- src/storm/logic/CumulativeRewardFormula.cpp | 9 ++ .../SparseMultiObjectivePreprocessor.cpp | 127 +++++++++++++----- 2 files changed, 101 insertions(+), 35 deletions(-) diff --git a/src/storm/logic/CumulativeRewardFormula.cpp b/src/storm/logic/CumulativeRewardFormula.cpp index a78c9b274..cae9590a6 100644 --- a/src/storm/logic/CumulativeRewardFormula.cpp +++ b/src/storm/logic/CumulativeRewardFormula.cpp @@ -3,6 +3,7 @@ #include "storm/logic/FormulaVisitor.h" #include "storm/utility/macros.h" +#include "storm/utility/constants.h" #include "storm/exceptions/InvalidPropertyException.h" #include "storm/exceptions/InvalidOperationException.h" @@ -70,6 +71,14 @@ namespace storm { return value; } + template <> + storm::RationalNumber CumulativeRewardFormula::getBound() const { + checkNoVariablesInBound(bound.getBound()); + storm::RationalNumber value = bound.getBound().evaluateAsRational(); + STORM_LOG_THROW(value >= storm::utility::zero(), storm::exceptions::InvalidPropertyException, "Time-bound must not evaluate to negative number."); + return value; + } + template <> uint64_t CumulativeRewardFormula::getBound() const { checkNoVariablesInBound(bound.getBound()); diff --git a/src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp b/src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp index df8e5ae4d..9656f28f6 100644 --- a/src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp +++ b/src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp @@ -518,54 +518,111 @@ namespace storm { template boost::optional SparseMultiObjectivePreprocessor::computeUpperResultBound(ReturnType const& result, uint64_t objIndex, storm::storage::SparseMatrix const& backwardTransitions) { + boost::optional upperBound; - if (result.objectives[objIndex].formula->isRewardOperatorFormula() && (result.objectives[objIndex].formula->getSubformula().isTotalRewardFormula() || result.objectives[objIndex].formula->getSubformula().isCumulativeRewardFormula())) { - // TODO: Consider cumulative reward formulas less naively - // TODO: We have to eliminate ECs here to treat zero-reward ECs - auto const& transitions = result.preprocessedModel->getTransitionMatrix(); + if (!result.originalModel.isOfType(storm::models::ModelType::Mdp)) { + return upperBound; + } + + auto const& transitions = result.preprocessedModel->getTransitionMatrix(); + + if (result.objectives[objIndex].formula->isRewardOperatorFormula()) { auto const& rewModel = result.preprocessedModel->getRewardModel(result.objectives[objIndex].formula->asRewardOperatorFormula().getRewardModelName()); - storm::storage::BitVector allStates(result.preprocessedModel->getNumberOfStates(), true); - - // Get the set of states from which no reward is reachable - auto nonZeroRewardStates = rewModel.getStatesWithZeroReward(transitions); - nonZeroRewardStates.complement(); - auto expRewGreater0EStates = storm::utility::graph::performProbGreater0E(backwardTransitions, allStates, nonZeroRewardStates); + auto actionRewards = rewModel.getTotalRewardVector(transitions); - auto zeroRewardChoices = rewModel.getChoicesWithZeroReward(transitions); - - auto ecElimRes = storm::transformer::EndComponentEliminator::transform(transitions, expRewGreater0EStates, zeroRewardChoices, ~allStates); - - allStates.resize(ecElimRes.matrix.getRowGroupCount()); - storm::storage::BitVector outStates(allStates.size(), false); - std::vector rew0StateProbs; - rew0StateProbs.reserve(ecElimRes.matrix.getRowCount()); - for (uint64_t state = 0; state < allStates.size(); ++ state) { - for (uint64_t choice = ecElimRes.matrix.getRowGroupIndices()[state]; choice < ecElimRes.matrix.getRowGroupIndices()[state + 1]; ++choice) { - ValueType outProb = storm::utility::one() - ecElimRes.matrix.getRowSum(choice); - if (!storm::utility::isZero(outProb)) { - outStates.set(state, true); + // TODO: Consider cumulative reward formulas less naively + if (result.objectives[objIndex].formula->getSubformula().isCumulativeRewardFormula()) { + auto const& cumulativeRewardFormula = result.objectives[objIndex].formula->getSubformula().asCumulativeRewardFormula(); + ValueType rewardBound = cumulativeRewardFormula.template getBound(); + if (cumulativeRewardFormula.getTimeBoundReference().isRewardBound()) { + auto const& costModel = result.preprocessedModel->getRewardModel(cumulativeRewardFormula.getTimeBoundReference().getRewardName()); + if (!costModel.hasTransitionRewards()) { + auto actionCosts = costModel.getTotalRewardVector(transitions); + typename SparseModelType::ValueType largestRewardPerCost = storm::utility::zero(); + bool isFinite = true; + for (auto rewIt = actionRewards.begin(), costIt = actionCosts.begin(); rewIt != actionRewards.end(); ++rewIt, ++costIt) { + if (!storm::utility::isZero(*rewIt)) { + if (storm::utility::isZero(*costIt)) { + isFinite = false; + break; + } + ValueType rewardPerCost = *rewIt / *costIt; + largestRewardPerCost = std::max(largestRewardPerCost, rewardPerCost); + } + } + + if (isFinite) { + ValueType newResultBound = largestRewardPerCost * rewardBound; + if (upperBound) { + upperBound = std::min(upperBound.get(), newResultBound); + } else { + upperBound = newResultBound; + } + } + } + } else { + ValueType newResultBound = (*std::max_element(actionRewards.begin(), actionRewards.end())) * rewardBound; + if (upperBound) { + upperBound = std::min(upperBound.get(), newResultBound); + } else { + upperBound = newResultBound; } - rew0StateProbs.push_back(outProb); } } - // An upper reward bound can only be computed if it is below infinity - if (storm::utility::graph::performProb1A(ecElimRes.matrix, ecElimRes.matrix.getRowGroupIndices(), ecElimRes.matrix.transpose(true), allStates, outStates).full()) { + if (result.objectives[objIndex].formula->getSubformula().isTotalRewardFormula() || result.objectives[objIndex].formula->getSubformula().isCumulativeRewardFormula()) { + // We have to eliminate ECs here to treat zero-reward ECs + + storm::storage::BitVector allStates(result.preprocessedModel->getNumberOfStates(), true); + + // Get the set of states from which no reward is reachable + auto nonZeroRewardStates = rewModel.getStatesWithZeroReward(transitions); + nonZeroRewardStates.complement(); + auto expRewGreater0EStates = storm::utility::graph::performProbGreater0E(backwardTransitions, allStates, nonZeroRewardStates); + + auto zeroRewardChoices = rewModel.getChoicesWithZeroReward(transitions); - auto actionRewards = rewModel.getTotalRewardVector(transitions); - std::vector rewards; - rewards.reserve(ecElimRes.matrix.getRowCount()); - for (auto row : ecElimRes.newToOldRowMapping) { - rewards.push_back(actionRewards[row]); + auto ecElimRes = storm::transformer::EndComponentEliminator::transform(transitions, expRewGreater0EStates, zeroRewardChoices, ~allStates); + + allStates.resize(ecElimRes.matrix.getRowGroupCount()); + storm::storage::BitVector outStates(allStates.size(), false); + std::vector rew0StateProbs; + rew0StateProbs.reserve(ecElimRes.matrix.getRowCount()); + for (uint64_t state = 0; state < allStates.size(); ++ state) { + for (uint64_t choice = ecElimRes.matrix.getRowGroupIndices()[state]; choice < ecElimRes.matrix.getRowGroupIndices()[state + 1]; ++choice) { + ValueType outProb = storm::utility::one() - ecElimRes.matrix.getRowSum(choice); + if (!storm::utility::isZero(outProb)) { + outStates.set(state, true); + } + rew0StateProbs.push_back(outProb); + } } - storm::modelchecker::helper::BaierUpperRewardBoundsComputer baier(ecElimRes.matrix, rewards, rew0StateProbs); - return baier.computeUpperBound(); + // An upper reward bound can only be computed if it is below infinity + if (storm::utility::graph::performProb1A(ecElimRes.matrix, ecElimRes.matrix.getRowGroupIndices(), ecElimRes.matrix.transpose(true), allStates, outStates).full()) { + + std::vector rewards; + rewards.reserve(ecElimRes.matrix.getRowCount()); + for (auto row : ecElimRes.newToOldRowMapping) { + rewards.push_back(actionRewards[row]); + } + + storm::modelchecker::helper::BaierUpperRewardBoundsComputer baier(ecElimRes.matrix, rewards, rew0StateProbs); + if (upperBound) { + upperBound = std::min(upperBound.get(), baier.computeUpperBound()); + } else { + upperBound = baier.computeUpperBound(); + } + } } } - // reaching this point means that we were not able to compute an upper bound - return boost::none; + if (upperBound) { + STORM_LOG_INFO("Computed upper result bound " << upperBound.get() << " for objective " << *result.objectives[objIndex].formula << "."); + } else { + STORM_LOG_WARN("Could not compute upper result bound for objective " << *result.objectives[objIndex].formula); + } + return upperBound; }