From ce5ca9d1ced4ecfdc5b77ce00ecc05c914c81164 Mon Sep 17 00:00:00 2001
From: dehnert <dehnert@cs.rwth-aachen.de>
Date: Tue, 13 Sep 2016 11:44:08 +0200
Subject: [PATCH] added proper action reward handling to JANI next-state
 generator

Former-commit-id: cd554d6e127faf12c368dea1e5e2a2999bba5f51 [formerly 47dfb5a7967fb88c345851604e8a8974f0b1e09e]
Former-commit-id: 67a31637c5b2d1c36afec11b017bd46a2794b38c
---
 src/builder/ExplicitModelBuilder.cpp      |  4 +--
 src/generator/Choice.cpp                  | 35 +++++++++++++----------
 src/generator/Choice.h                    | 19 +++++++-----
 src/generator/JaniNextStateGenerator.cpp  | 33 ++++++++++++++++++---
 src/generator/JaniNextStateGenerator.h    |  3 ++
 src/generator/PrismNextStateGenerator.cpp | 12 ++++----
 6 files changed, 72 insertions(+), 34 deletions(-)
diff --git a/src/builder/ExplicitModelBuilder.cpp b/src/builder/ExplicitModelBuilder.cpp
index 5faf386e5..8d2164f9f 100644
--- a/src/builder/ExplicitModelBuilder.cpp
+++ b/src/builder/ExplicitModelBuilder.cpp
@@ -274,7 +274,7 @@ namespace storm {
                     for (auto const& choice : behavior) {
                         // Add command labels if requested.
                         if (generator->getOptions().isBuildChoiceLabelsSet()) {
-                            choiceLabels.get().push_back(choice.getChoiceLabels());
+                            choiceLabels.get().push_back(choice.getLabels());
                         }
                         
                         // If we keep track of the Markovian choices, store whether the current one is Markovian.
@@ -289,7 +289,7 @@ namespace storm {
                         }
                         
                         // Add the rewards to the reward models.
-                        auto choiceRewardIt = choice.getChoiceRewards().begin();
+                        auto choiceRewardIt = choice.getRewards().begin();
                         for (auto& rewardModelBuilder : rewardModelBuilders) {
                             if (rewardModelBuilder.hasStateActionRewards()) {
                                 rewardModelBuilder.addStateActionReward(*choiceRewardIt);
diff --git a/src/generator/Choice.cpp b/src/generator/Choice.cpp
index 7f91a4b4d..4337fffd4 100644
--- a/src/generator/Choice.cpp
+++ b/src/generator/Choice.cpp
@@ -8,7 +8,7 @@ namespace storm {
     namespace generator {
         
         template<typename ValueType, typename StateType>
-        Choice<ValueType, StateType>::Choice(uint_fast64_t actionIndex, bool markovian) : markovian(markovian), actionIndex(actionIndex), distribution(), totalMass(storm::utility::zero<ValueType>()), choiceRewards() {
+        Choice<ValueType, StateType>::Choice(uint_fast64_t actionIndex, bool markovian) : markovian(markovian), actionIndex(actionIndex), distribution(), totalMass(storm::utility::zero<ValueType>()), rewards(), labels() {
             // Intentionally left empty.
         }
         
@@ -33,24 +33,24 @@ namespace storm {
         }
         
         template<typename ValueType, typename StateType>
-        void Choice<ValueType, StateType>::addChoiceLabel(uint_fast64_t label) {
-            if (!choiceLabels) {
-                choiceLabels = LabelSet();
+        void Choice<ValueType, StateType>::addLabel(uint_fast64_t label) {
+            if (!labels) {
+                labels = LabelSet();
             }
-            choiceLabels->insert(label);
+            labels->insert(label);
         }
         
         template<typename ValueType, typename StateType>
-        void Choice<ValueType, StateType>::addChoiceLabels(LabelSet const& labelSet) {
-            if (!choiceLabels) {
-                choiceLabels = LabelSet();
+        void Choice<ValueType, StateType>::addLabels(LabelSet const& labelSet) {
+            if (!labels) {
+                labels = LabelSet();
             }
-            choiceLabels->insert(labelSet.begin(), labelSet.end());
+            labels->insert(labelSet.begin(), labelSet.end());
         }
         
         template<typename ValueType, typename StateType>
-        boost::container::flat_set<uint_fast64_t> const& Choice<ValueType, StateType>::getChoiceLabels() const {
-            return *choiceLabels;
+        boost::container::flat_set<uint_fast64_t> const& Choice<ValueType, StateType>::getLabels() const {
+            return *labels;
         }
         
         template<typename ValueType, typename StateType>
@@ -70,13 +70,18 @@ namespace storm {
         }
         
         template<typename ValueType, typename StateType>
-        void Choice<ValueType, StateType>::addChoiceReward(ValueType const& value) {
-            choiceRewards.push_back(value);
+        void Choice<ValueType, StateType>::addReward(ValueType const& value) {
+            rewards.push_back(value);
         }
         
         template<typename ValueType, typename StateType>
-        std::vector<ValueType> const& Choice<ValueType, StateType>::getChoiceRewards() const {
-            return choiceRewards;
+        void Choice<ValueType, StateType>::addRewards(std::vector<ValueType>&& values) {
+            this->rewards = std::move(values);
+        }
+        
+        template<typename ValueType, typename StateType>
+        std::vector<ValueType> const& Choice<ValueType, StateType>::getRewards() const {
+            return rewards;
         }
         
         template<typename ValueType, typename StateType>
diff --git a/src/generator/Choice.h b/src/generator/Choice.h
index 76b6c8d16..bd20af0f0 100644
--- a/src/generator/Choice.h
+++ b/src/generator/Choice.h
@@ -66,21 +66,21 @@ namespace storm {
              *
              * @param label The label to associate with this choice.
              */
-            void addChoiceLabel(uint_fast64_t label);
+            void addLabel(uint_fast64_t label);
             
             /*!
              * Adds the given label set to the labels associated with this choice.
              *
              * @param labelSet The label set to associate with this choice.
              */
-            void addChoiceLabels(LabelSet const& labelSet);
+            void addLabels(LabelSet const& labelSet);
             
             /*!
              * Retrieves the set of labels associated with this choice.
              *
              * @return The set of labels associated with this choice.
              */
-            LabelSet const& getChoiceLabels() const;
+            LabelSet const& getLabels() const;
             
             /*!
              * Retrieves the index of the action of this choice.
@@ -104,12 +104,17 @@ namespace storm {
             /*!
              * Adds the given value to the reward associated with this choice.
              */
-            void addChoiceReward(ValueType const& value);
+            void addReward(ValueType const& value);
+            
+            /*!
+             * Adds the given choices rewards to this choice.
+             */
+            void addRewards(std::vector<ValueType>&& values);
             
             /*!
              * Retrieves the rewards for this choice under selected reward models.
              */
-            std::vector<ValueType> const& getChoiceRewards() const;
+            std::vector<ValueType> const& getRewards() const;
             
             /*!
              * Retrieves whether the choice is Markovian.
@@ -135,10 +140,10 @@ namespace storm {
             ValueType totalMass;
             
             // The reward values associated with this choice.
-            std::vector<ValueType> choiceRewards;
+            std::vector<ValueType> rewards;
             
             // The labels that are associated with this choice.
-            boost::optional<LabelSet> choiceLabels;
+            boost::optional<LabelSet> labels;
         };
 
         template<typename ValueType, typename StateType>
diff --git a/src/generator/JaniNextStateGenerator.cpp b/src/generator/JaniNextStateGenerator.cpp
index 5b5f54151..0df974f3b 100644
--- a/src/generator/JaniNextStateGenerator.cpp
+++ b/src/generator/JaniNextStateGenerator.cpp
@@ -22,7 +22,7 @@ namespace storm {
         }
         
         template<typename ValueType, typename StateType>
-        JaniNextStateGenerator<ValueType, StateType>::JaniNextStateGenerator(storm::jani::Model const& model, NextStateGeneratorOptions const& options, bool flag) : NextStateGenerator<ValueType, StateType>(model.getExpressionManager(), VariableInformation(model), options), model(model), rewardVariables() {
+        JaniNextStateGenerator<ValueType, StateType>::JaniNextStateGenerator(storm::jani::Model const& model, NextStateGeneratorOptions const& options, bool flag) : NextStateGenerator<ValueType, StateType>(model.getExpressionManager(), VariableInformation(model), options), model(model), rewardVariables(), hasStateActionRewards(false) {
             STORM_LOG_THROW(model.hasDefaultComposition(), storm::exceptions::WrongFormatException, "The explicit next-state generator currently does not support custom system compositions.");
             STORM_LOG_THROW(!model.hasNonGlobalTransientVariable(), storm::exceptions::InvalidSettingsException, "The explicit next-state generator currently does not support automata-local transient variables.");
             STORM_LOG_THROW(!this->options.isBuildChoiceLabelsSet(), storm::exceptions::InvalidSettingsException, "JANI next-state generator cannot generate choice labels.");
@@ -282,6 +282,10 @@ namespace storm {
             if (this->isDeterministicModel() && totalNumberOfChoices > 1) {
                 Choice<ValueType> globalChoice;
                 
+                // For CTMCs, we need to keep track of the total exit rate to scale the action rewards later. For DTMCs
+                // this is equal to the number of choices, which is why we initialize it like this here.
+                ValueType totalExitRate = this->isDiscreteTimeModel() ? static_cast<ValueType>(totalNumberOfChoices) : storm::utility::zero<ValueType>();
+
                 // Iterate over all choices and combine the probabilities/rates into one choice.
                 for (auto const& choice : allChoices) {
                     for (auto const& stateProbabilityPair : choice) {
@@ -292,11 +296,23 @@ namespace storm {
                         }
                     }
                     
+                    if (hasStateActionRewards && !this->isDiscreteTimeModel()) {
+                        totalExitRate += choice.getTotalMass();
+                    }
+                    
                     if (this->options.isBuildChoiceLabelsSet()) {
-                        globalChoice.addChoiceLabels(choice.getChoiceLabels());
+                        globalChoice.addLabels(choice.getLabels());
                     }
                 }
-                
+             
+                std::vector<ValueType> stateActionRewards(rewardVariables.size(), storm::utility::zero<ValueType>());
+                for (auto const& choice : allChoices) {
+                    for (uint_fast64_t rewardVariableIndex = 0; rewardVariableIndex < rewardVariables.size(); ++rewardVariableIndex) {
+                        stateActionRewards[rewardVariableIndex] += choice.getRewards()[rewardVariableIndex] * choice.getTotalMass() / totalExitRate;
+                    }
+                }
+                globalChoice.addRewards(std::move(stateActionRewards));
+                                
                 // Move the newly fused choice in place.
                 allChoices.clear();
                 allChoices.push_back(std::move(globalChoice));
@@ -349,7 +365,7 @@ namespace storm {
                     }
                     
                     // Create the state-action reward for the newly created choice.
-                    performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&choice] (ValueType const& value) { choice.addChoiceReward(value); } );
+                    performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&choice] (ValueType const& value) { choice.addReward(value); } );
 
                     // Check that the resulting distribution is in fact a distribution.
                     STORM_LOG_THROW(!this->isDiscreteTimeModel() || this->comparator.isOne(probabilitySum), storm::exceptions::WrongFormatException, "Probabilities do not sum to one for edge (actually sum to " << probabilitySum << ").");
@@ -385,6 +401,7 @@ namespace storm {
                     while (!done) {
                         boost::container::flat_map<CompressedState, ValueType>* currentTargetStates = new boost::container::flat_map<CompressedState, ValueType>();
                         boost::container::flat_map<CompressedState, ValueType>* newTargetStates = new boost::container::flat_map<CompressedState, ValueType>();
+                        std::vector<ValueType> stateActionRewards(rewardVariables.size(), storm::utility::zero<ValueType>());
                         
                         currentTargetStates->emplace(state, storm::utility::one<ValueType>());
                         
@@ -405,6 +422,10 @@ namespace storm {
                                         newTargetStates->emplace(newTargetState, stateProbabilityPair.second * this->evaluator.asRational(destination.getProbability()));
                                     }
                                 }
+                                
+                                // Create the state-action reward for the newly created choice.
+                                auto valueIt = stateActionRewards.begin();
+                                performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
                             }
                             
                             // If there is one more command to come, shift the target states one time step back.
@@ -423,6 +444,9 @@ namespace storm {
                         // Now create the actual distribution.
                         Choice<ValueType>& choice = result.back();
                         
+                        // Add the rewards to the choice.
+                        choice.addRewards(std::move(stateActionRewards));
+
                         // Add the probabilities/rates to the newly created choice.
                         ValueType probabilitySum = storm::utility::zero<ValueType>();
                         for (auto const& stateProbabilityPair : *newTargetStates) {
@@ -601,6 +625,7 @@ namespace storm {
                         }
                         if (*rewardVariableIt == assignment.getExpressionVariable()) {
                             rewardModelInformation[std::distance(rewardVariables.begin(), rewardVariableIt)].setHasStateActionRewards();
+                            hasStateActionRewards = true;
                             ++rewardVariableIt;
                         }
                     }
diff --git a/src/generator/JaniNextStateGenerator.h b/src/generator/JaniNextStateGenerator.h
index d9932f60a..d0b5707a6 100644
--- a/src/generator/JaniNextStateGenerator.h
+++ b/src/generator/JaniNextStateGenerator.h
@@ -108,6 +108,9 @@ namespace storm {
             
             /// A vector storing information about the corresponding reward models (variables).
             std::vector<RewardModelInformation> rewardModelInformation;
+            
+            // A flag that stores whether at least one of the selected reward models has state-action rewards.
+            bool hasStateActionRewards;
         };
         
     }
diff --git a/src/generator/PrismNextStateGenerator.cpp b/src/generator/PrismNextStateGenerator.cpp
index 1ecbec2ab..6c24cd00b 100644
--- a/src/generator/PrismNextStateGenerator.cpp
+++ b/src/generator/PrismNextStateGenerator.cpp
@@ -242,7 +242,7 @@ namespace storm {
                     }
                     
                     if (this->options.isBuildChoiceLabelsSet()) {
-                        globalChoice.addChoiceLabels(choice.getChoiceLabels());
+                        globalChoice.addLabels(choice.getLabels());
                     }
                 }
                 
@@ -259,7 +259,7 @@ namespace storm {
                             
                         }
                     }
-                    globalChoice.addChoiceReward(stateActionRewardValue);
+                    globalChoice.addReward(stateActionRewardValue);
                 }
                 
                 // Move the newly fused choice in place.
@@ -382,7 +382,7 @@ namespace storm {
                     
                     // Remember the command labels only if we were asked to.
                     if (this->options.isBuildChoiceLabelsSet()) {
-                        choice.addChoiceLabel(command.getGlobalIndex());
+                        choice.addLabel(command.getGlobalIndex());
                     }
                     
                     // Iterate over all updates of the current command.
@@ -410,7 +410,7 @@ namespace storm {
                                 }
                             }
                         }
-                        choice.addChoiceReward(stateActionRewardValue);
+                        choice.addReward(stateActionRewardValue);
                     }
                     
                     // Check that the resulting distribution is in fact a distribution.
@@ -486,7 +486,7 @@ namespace storm {
                         if (this->options.isBuildChoiceLabelsSet()) {
                             // Add the labels of all commands to this choice.
                             for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) {
-                                choice.addChoiceLabel(iteratorList[i]->get().getGlobalIndex());
+                                choice.addLabel(iteratorList[i]->get().getGlobalIndex());
                             }
                         }
                         
@@ -511,7 +511,7 @@ namespace storm {
                                     }
                                 }
                             }
-                            choice.addChoiceReward(stateActionRewardValue);
+                            choice.addReward(stateActionRewardValue);
                         }
                         
                         // Dispose of the temporary maps.