added proper action reward handling to JANI next-state generator

Former-commit-id: cd554d6e12 [formerly 47dfb5a796] Former-commit-id: 67a31637c5
8 years ago · ce5ca9d1ce
6 changed files with 72 additions and 34 deletions
--- a/src/builder/ExplicitModelBuilder.cpp
+++ b/src/builder/ExplicitModelBuilder.cpp
@ -274,7 +274,7 @@ namespace storm {
                    for (auto const& choice : behavior) {
                        // Add command labels if requested.
                        if (generator->getOptions().isBuildChoiceLabelsSet()) {
-                            choiceLabels.get().push_back(choice.getChoiceLabels());
+                            choiceLabels.get().push_back(choice.getLabels());
                        }
                        
                        // If we keep track of the Markovian choices, store whether the current one is Markovian.
@ -289,7 +289,7 @@ namespace storm {
                        }
                        
                        // Add the rewards to the reward models.
-                        auto choiceRewardIt = choice.getChoiceRewards().begin();
+                        auto choiceRewardIt = choice.getRewards().begin();
                        for (auto& rewardModelBuilder : rewardModelBuilders) {
                            if (rewardModelBuilder.hasStateActionRewards()) {
                                rewardModelBuilder.addStateActionReward(*choiceRewardIt);
--- a/src/generator/Choice.cpp
+++ b/src/generator/Choice.cpp
@ -8,7 +8,7 @@ namespace storm {
    namespace generator {
        
        template<typename ValueType, typename StateType>
-        Choice<ValueType, StateType>::Choice(uint_fast64_t actionIndex, bool markovian) : markovian(markovian), actionIndex(actionIndex), distribution(), totalMass(storm::utility::zero<ValueType>()), choiceRewards() {
+        Choice<ValueType, StateType>::Choice(uint_fast64_t actionIndex, bool markovian) : markovian(markovian), actionIndex(actionIndex), distribution(), totalMass(storm::utility::zero<ValueType>()), rewards(), labels() {
            // Intentionally left empty.
        }
        
@ -33,24 +33,24 @@ namespace storm {
        }
        
        template<typename ValueType, typename StateType>
-        void Choice<ValueType, StateType>::addChoiceLabel(uint_fast64_t label) {
-            if (!choiceLabels) {
-                choiceLabels = LabelSet();
+        void Choice<ValueType, StateType>::addLabel(uint_fast64_t label) {
+            if (!labels) {
+                labels = LabelSet();
            }
-            choiceLabels->insert(label);
+            labels->insert(label);
        }
        
        template<typename ValueType, typename StateType>
-        void Choice<ValueType, StateType>::addChoiceLabels(LabelSet const& labelSet) {
-            if (!choiceLabels) {
-                choiceLabels = LabelSet();
+        void Choice<ValueType, StateType>::addLabels(LabelSet const& labelSet) {
+            if (!labels) {
+                labels = LabelSet();
            }
-            choiceLabels->insert(labelSet.begin(), labelSet.end());
+            labels->insert(labelSet.begin(), labelSet.end());
        }
        
        template<typename ValueType, typename StateType>
-        boost::container::flat_set<uint_fast64_t> const& Choice<ValueType, StateType>::getChoiceLabels() const {
-            return *choiceLabels;
+        boost::container::flat_set<uint_fast64_t> const& Choice<ValueType, StateType>::getLabels() const {
+            return *labels;
        }
        
        template<typename ValueType, typename StateType>
@ -70,13 +70,18 @@ namespace storm {
        }
        
        template<typename ValueType, typename StateType>
-        void Choice<ValueType, StateType>::addChoiceReward(ValueType const& value) {
-            choiceRewards.push_back(value);
+        void Choice<ValueType, StateType>::addReward(ValueType const& value) {
+            rewards.push_back(value);
        }
        
        template<typename ValueType, typename StateType>
-        std::vector<ValueType> const& Choice<ValueType, StateType>::getChoiceRewards() const {
-            return choiceRewards;
+        void Choice<ValueType, StateType>::addRewards(std::vector<ValueType>&& values) {
+            this->rewards = std::move(values);
+        }
+        
+        template<typename ValueType, typename StateType>
+        std::vector<ValueType> const& Choice<ValueType, StateType>::getRewards() const {
+            return rewards;
        }
        
        template<typename ValueType, typename StateType>
--- a/src/generator/Choice.h
+++ b/src/generator/Choice.h
@ -66,21 +66,21 @@ namespace storm {
             *
             * @param label The label to associate with this choice.
             */
-            void addChoiceLabel(uint_fast64_t label);
+            void addLabel(uint_fast64_t label);
            
            /*!
             * Adds the given label set to the labels associated with this choice.
             *
             * @param labelSet The label set to associate with this choice.
             */
-            void addChoiceLabels(LabelSet const& labelSet);
+            void addLabels(LabelSet const& labelSet);
            
            /*!
             * Retrieves the set of labels associated with this choice.
             *
             * @return The set of labels associated with this choice.
             */
-            LabelSet const& getChoiceLabels() const;
+            LabelSet const& getLabels() const;
            
            /*!
             * Retrieves the index of the action of this choice.
@ -104,12 +104,17 @@ namespace storm {
            /*!
             * Adds the given value to the reward associated with this choice.
             */
-            void addChoiceReward(ValueType const& value);
+            void addReward(ValueType const& value);
+            
+            /*!
+             * Adds the given choices rewards to this choice.
+             */
+            void addRewards(std::vector<ValueType>&& values);
            
            /*!
             * Retrieves the rewards for this choice under selected reward models.
             */
-            std::vector<ValueType> const& getChoiceRewards() const;
+            std::vector<ValueType> const& getRewards() const;
            
            /*!
             * Retrieves whether the choice is Markovian.
@ -135,10 +140,10 @@ namespace storm {
            ValueType totalMass;
            
            // The reward values associated with this choice.
-            std::vector<ValueType> choiceRewards;
+            std::vector<ValueType> rewards;
            
            // The labels that are associated with this choice.
-            boost::optional<LabelSet> choiceLabels;
+            boost::optional<LabelSet> labels;
        };

        template<typename ValueType, typename StateType>
--- a/src/generator/JaniNextStateGenerator.cpp
+++ b/src/generator/JaniNextStateGenerator.cpp
@ -22,7 +22,7 @@ namespace storm {
        }
        
        template<typename ValueType, typename StateType>
-        JaniNextStateGenerator<ValueType, StateType>::JaniNextStateGenerator(storm::jani::Model const& model, NextStateGeneratorOptions const& options, bool flag) : NextStateGenerator<ValueType, StateType>(model.getExpressionManager(), VariableInformation(model), options), model(model), rewardVariables() {
+        JaniNextStateGenerator<ValueType, StateType>::JaniNextStateGenerator(storm::jani::Model const& model, NextStateGeneratorOptions const& options, bool flag) : NextStateGenerator<ValueType, StateType>(model.getExpressionManager(), VariableInformation(model), options), model(model), rewardVariables(), hasStateActionRewards(false) {
            STORM_LOG_THROW(model.hasDefaultComposition(), storm::exceptions::WrongFormatException, "The explicit next-state generator currently does not support custom system compositions.");
            STORM_LOG_THROW(!model.hasNonGlobalTransientVariable(), storm::exceptions::InvalidSettingsException, "The explicit next-state generator currently does not support automata-local transient variables.");
            STORM_LOG_THROW(!this->options.isBuildChoiceLabelsSet(), storm::exceptions::InvalidSettingsException, "JANI next-state generator cannot generate choice labels.");
@ -282,6 +282,10 @@ namespace storm {
            if (this->isDeterministicModel() && totalNumberOfChoices > 1) {
                Choice<ValueType> globalChoice;
                
+                // For CTMCs, we need to keep track of the total exit rate to scale the action rewards later. For DTMCs
+                // this is equal to the number of choices, which is why we initialize it like this here.
+                ValueType totalExitRate = this->isDiscreteTimeModel() ? static_cast<ValueType>(totalNumberOfChoices) : storm::utility::zero<ValueType>();
+
                // Iterate over all choices and combine the probabilities/rates into one choice.
                for (auto const& choice : allChoices) {
                    for (auto const& stateProbabilityPair : choice) {
@ -292,11 +296,23 @@ namespace storm {
                        }
                    }
                    
+                    if (hasStateActionRewards && !this->isDiscreteTimeModel()) {
+                        totalExitRate += choice.getTotalMass();
+                    }
+                    
                    if (this->options.isBuildChoiceLabelsSet()) {
-                        globalChoice.addChoiceLabels(choice.getChoiceLabels());
+                        globalChoice.addLabels(choice.getLabels());
                    }
                }
-                
+             
+                std::vector<ValueType> stateActionRewards(rewardVariables.size(), storm::utility::zero<ValueType>());
+                for (auto const& choice : allChoices) {
+                    for (uint_fast64_t rewardVariableIndex = 0; rewardVariableIndex < rewardVariables.size(); ++rewardVariableIndex) {
+                        stateActionRewards[rewardVariableIndex] += choice.getRewards()[rewardVariableIndex] * choice.getTotalMass() / totalExitRate;
+                    }
+                }
+                globalChoice.addRewards(std::move(stateActionRewards));
+                                
                // Move the newly fused choice in place.
                allChoices.clear();
                allChoices.push_back(std::move(globalChoice));
@ -349,7 +365,7 @@ namespace storm {
                    }
                    
                    // Create the state-action reward for the newly created choice.
-                    performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&choice] (ValueType const& value) { choice.addChoiceReward(value); } );
+                    performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&choice] (ValueType const& value) { choice.addReward(value); } );

                    // Check that the resulting distribution is in fact a distribution.
                    STORM_LOG_THROW(!this->isDiscreteTimeModel() || this->comparator.isOne(probabilitySum), storm::exceptions::WrongFormatException, "Probabilities do not sum to one for edge (actually sum to " << probabilitySum << ").");
@ -385,6 +401,7 @@ namespace storm {
                    while (!done) {
                        boost::container::flat_map<CompressedState, ValueType>* currentTargetStates = new boost::container::flat_map<CompressedState, ValueType>();
                        boost::container::flat_map<CompressedState, ValueType>* newTargetStates = new boost::container::flat_map<CompressedState, ValueType>();
+                        std::vector<ValueType> stateActionRewards(rewardVariables.size(), storm::utility::zero<ValueType>());
                        
                        currentTargetStates->emplace(state, storm::utility::one<ValueType>());
                        
@ -405,6 +422,10 @@ namespace storm {
                                        newTargetStates->emplace(newTargetState, stateProbabilityPair.second * this->evaluator.asRational(destination.getProbability()));
                                    }
                                }
+                                
+                                // Create the state-action reward for the newly created choice.
+                                auto valueIt = stateActionRewards.begin();
+                                performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
                            }
                            
                            // If there is one more command to come, shift the target states one time step back.
@ -423,6 +444,9 @@ namespace storm {
                        // Now create the actual distribution.
                        Choice<ValueType>& choice = result.back();
                        
+                        // Add the rewards to the choice.
+                        choice.addRewards(std::move(stateActionRewards));
+
                        // Add the probabilities/rates to the newly created choice.
                        ValueType probabilitySum = storm::utility::zero<ValueType>();
                        for (auto const& stateProbabilityPair : *newTargetStates) {
@ -601,6 +625,7 @@ namespace storm {
                        }
                        if (*rewardVariableIt == assignment.getExpressionVariable()) {
                            rewardModelInformation[std::distance(rewardVariables.begin(), rewardVariableIt)].setHasStateActionRewards();
+                            hasStateActionRewards = true;
                            ++rewardVariableIt;
                        }
                    }
--- a/src/generator/JaniNextStateGenerator.h
+++ b/src/generator/JaniNextStateGenerator.h
@ -108,6 +108,9 @@ namespace storm {
            
            /// A vector storing information about the corresponding reward models (variables).
            std::vector<RewardModelInformation> rewardModelInformation;
+            
+            // A flag that stores whether at least one of the selected reward models has state-action rewards.
+            bool hasStateActionRewards;
        };
        
    }
--- a/src/generator/PrismNextStateGenerator.cpp
+++ b/src/generator/PrismNextStateGenerator.cpp
@ -242,7 +242,7 @@ namespace storm {
                    }
                    
                    if (this->options.isBuildChoiceLabelsSet()) {
-                        globalChoice.addChoiceLabels(choice.getChoiceLabels());
+                        globalChoice.addLabels(choice.getLabels());
                    }
                }
                
@ -259,7 +259,7 @@ namespace storm {
                            
                        }
                    }
-                    globalChoice.addChoiceReward(stateActionRewardValue);
+                    globalChoice.addReward(stateActionRewardValue);
                }
                
                // Move the newly fused choice in place.
@ -382,7 +382,7 @@ namespace storm {
                    
                    // Remember the command labels only if we were asked to.
                    if (this->options.isBuildChoiceLabelsSet()) {
-                        choice.addChoiceLabel(command.getGlobalIndex());
+                        choice.addLabel(command.getGlobalIndex());
                    }
                    
                    // Iterate over all updates of the current command.
@ -410,7 +410,7 @@ namespace storm {
                                }
                            }
                        }
-                        choice.addChoiceReward(stateActionRewardValue);
+                        choice.addReward(stateActionRewardValue);
                    }
                    
                    // Check that the resulting distribution is in fact a distribution.
@ -486,7 +486,7 @@ namespace storm {
                        if (this->options.isBuildChoiceLabelsSet()) {
                            // Add the labels of all commands to this choice.
                            for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) {
-                                choice.addChoiceLabel(iteratorList[i]->get().getGlobalIndex());
+                                choice.addLabel(iteratorList[i]->get().getGlobalIndex());
                            }
                        }
                        
@ -511,7 +511,7 @@ namespace storm {
                                    }
                                }
                            }
-                            choice.addChoiceReward(stateActionRewardValue);
+                            choice.addReward(stateActionRewardValue);
                        }
                        
                        // Dispose of the temporary maps.