upper result bounds for cumulative reward formulas to enable interval iteration

8 years ago · 3215f25513
2 changed files with 101 additions and 35 deletions
--- a/src/storm/logic/CumulativeRewardFormula.cpp
+++ b/src/storm/logic/CumulativeRewardFormula.cpp
@ -3,6 +3,7 @@
 #include "storm/logic/FormulaVisitor.h"

 #include "storm/utility/macros.h"
+#include "storm/utility/constants.h"
 #include "storm/exceptions/InvalidPropertyException.h"
 #include "storm/exceptions/InvalidOperationException.h"

@ -70,6 +71,14 @@ namespace storm {
            return value;
        }

+        template <>
+        storm::RationalNumber CumulativeRewardFormula::getBound() const {
+            checkNoVariablesInBound(bound.getBound());
+            storm::RationalNumber value = bound.getBound().evaluateAsRational();
+            STORM_LOG_THROW(value >= storm::utility::zero<storm::RationalNumber>(), storm::exceptions::InvalidPropertyException, "Time-bound must not evaluate to negative number.");
+            return value;
+        }
+
        template <>
        uint64_t CumulativeRewardFormula::getBound() const {
            checkNoVariablesInBound(bound.getBound());
--- a/src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp
+++ b/src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp
@ -518,12 +518,61 @@ namespace storm {
        
            template<typename SparseModelType>
            boost::optional<typename SparseModelType::ValueType> SparseMultiObjectivePreprocessor<SparseModelType>::computeUpperResultBound(ReturnType const& result, uint64_t objIndex, storm::storage::SparseMatrix<ValueType> const& backwardTransitions) {
+                boost::optional<ValueType> upperBound;
+                
+                if (!result.originalModel.isOfType(storm::models::ModelType::Mdp)) {
+                    return upperBound;
+                }
                
-                if (result.objectives[objIndex].formula->isRewardOperatorFormula() && (result.objectives[objIndex].formula->getSubformula().isTotalRewardFormula() || result.objectives[objIndex].formula->getSubformula().isCumulativeRewardFormula())) {
-                    // TODO: Consider cumulative reward formulas less naively
-                    // TODO: We have to eliminate ECs here to treat zero-reward ECs
                auto const& transitions = result.preprocessedModel->getTransitionMatrix();
+                
+                if (result.objectives[objIndex].formula->isRewardOperatorFormula()) {
                    auto const& rewModel = result.preprocessedModel->getRewardModel(result.objectives[objIndex].formula->asRewardOperatorFormula().getRewardModelName());
+                    auto actionRewards = rewModel.getTotalRewardVector(transitions);
+                    
+                    // TODO: Consider cumulative reward formulas less naively
+                    if (result.objectives[objIndex].formula->getSubformula().isCumulativeRewardFormula()) {
+                        auto const& cumulativeRewardFormula = result.objectives[objIndex].formula->getSubformula().asCumulativeRewardFormula();
+                        ValueType rewardBound = cumulativeRewardFormula.template getBound<ValueType>();
+                        if (cumulativeRewardFormula.getTimeBoundReference().isRewardBound()) {
+                            auto const& costModel = result.preprocessedModel->getRewardModel(cumulativeRewardFormula.getTimeBoundReference().getRewardName());
+                            if (!costModel.hasTransitionRewards()) {
+                                auto actionCosts = costModel.getTotalRewardVector(transitions);
+                                typename  SparseModelType::ValueType largestRewardPerCost = storm::utility::zero<typename SparseModelType::ValueType>();
+                                bool isFinite = true;
+                                for (auto rewIt = actionRewards.begin(), costIt = actionCosts.begin(); rewIt != actionRewards.end(); ++rewIt, ++costIt) {
+                                    if (!storm::utility::isZero(*rewIt)) {
+                                        if (storm::utility::isZero(*costIt)) {
+                                            isFinite = false;
+                                            break;
+                                        }
+                                        ValueType rewardPerCost = *rewIt / *costIt;
+                                        largestRewardPerCost = std::max(largestRewardPerCost, rewardPerCost);
+                                    }
+                                }
+                                
+                                if (isFinite) {
+                                    ValueType newResultBound = largestRewardPerCost * rewardBound;
+                                    if (upperBound) {
+                                        upperBound = std::min(upperBound.get(), newResultBound);
+                                    } else {
+                                        upperBound = newResultBound;
+                                    }
+                                }
+                            }
+                        } else {
+                            ValueType newResultBound = (*std::max_element(actionRewards.begin(), actionRewards.end())) * rewardBound;
+                            if (upperBound) {
+                                upperBound = std::min(upperBound.get(), newResultBound);
+                            } else {
+                                upperBound = newResultBound;
+                            }
+                        }
+                    }
+                    
+                    if (result.objectives[objIndex].formula->getSubformula().isTotalRewardFormula() || result.objectives[objIndex].formula->getSubformula().isCumulativeRewardFormula()) {
+                        // We have to eliminate ECs here to treat zero-reward ECs
+ 
                        storm::storage::BitVector allStates(result.preprocessedModel->getNumberOfStates(), true);
                        
                        // Get the set of states from which no reward is reachable
@ -552,7 +601,6 @@ namespace storm {
                        // An upper reward bound can only be computed if it is below infinity
                        if (storm::utility::graph::performProb1A(ecElimRes.matrix, ecElimRes.matrix.getRowGroupIndices(), ecElimRes.matrix.transpose(true), allStates, outStates).full()) {
                            
-                        auto actionRewards = rewModel.getTotalRewardVector(transitions);
                            std::vector<ValueType> rewards;
                            rewards.reserve(ecElimRes.matrix.getRowCount());
                            for (auto row : ecElimRes.newToOldRowMapping) {
@ -560,12 +608,21 @@ namespace storm {
                            }
                            
                            storm::modelchecker::helper::BaierUpperRewardBoundsComputer<ValueType> baier(ecElimRes.matrix, rewards, rew0StateProbs);
-                        return baier.computeUpperBound();
+                            if (upperBound) {
+                                upperBound = std::min(upperBound.get(), baier.computeUpperBound());
+                            } else {
+                                upperBound = baier.computeUpperBound();
+                            }
+                        }
                    }
                }
                
-                // reaching this point means that we were not able to compute an upper bound
-                return boost::none;
+                if (upperBound) {
+                    STORM_LOG_INFO("Computed upper result bound " << upperBound.get() << " for objective " << *result.objectives[objIndex].formula << ".");
+                } else {
+                    STORM_LOG_WARN("Could not compute upper result bound for objective " << *result.objectives[objIndex].formula);
+                }
+                return upperBound;

            }