From ce5ca9d1ced4ecfdc5b77ce00ecc05c914c81164 Mon Sep 17 00:00:00 2001 From: dehnert Date: Tue, 13 Sep 2016 11:44:08 +0200 Subject: [PATCH] added proper action reward handling to JANI next-state generator Former-commit-id: cd554d6e127faf12c368dea1e5e2a2999bba5f51 [formerly 47dfb5a7967fb88c345851604e8a8974f0b1e09e] Former-commit-id: 67a31637c5b2d1c36afec11b017bd46a2794b38c --- src/builder/ExplicitModelBuilder.cpp | 4 +-- src/generator/Choice.cpp | 35 +++++++++++++---------- src/generator/Choice.h | 19 +++++++----- src/generator/JaniNextStateGenerator.cpp | 33 ++++++++++++++++++--- src/generator/JaniNextStateGenerator.h | 3 ++ src/generator/PrismNextStateGenerator.cpp | 12 ++++---- 6 files changed, 72 insertions(+), 34 deletions(-) diff --git a/src/builder/ExplicitModelBuilder.cpp b/src/builder/ExplicitModelBuilder.cpp index 5faf386e5..8d2164f9f 100644 --- a/src/builder/ExplicitModelBuilder.cpp +++ b/src/builder/ExplicitModelBuilder.cpp @@ -274,7 +274,7 @@ namespace storm { for (auto const& choice : behavior) { // Add command labels if requested. if (generator->getOptions().isBuildChoiceLabelsSet()) { - choiceLabels.get().push_back(choice.getChoiceLabels()); + choiceLabels.get().push_back(choice.getLabels()); } // If we keep track of the Markovian choices, store whether the current one is Markovian. @@ -289,7 +289,7 @@ namespace storm { } // Add the rewards to the reward models. - auto choiceRewardIt = choice.getChoiceRewards().begin(); + auto choiceRewardIt = choice.getRewards().begin(); for (auto& rewardModelBuilder : rewardModelBuilders) { if (rewardModelBuilder.hasStateActionRewards()) { rewardModelBuilder.addStateActionReward(*choiceRewardIt); diff --git a/src/generator/Choice.cpp b/src/generator/Choice.cpp index 7f91a4b4d..4337fffd4 100644 --- a/src/generator/Choice.cpp +++ b/src/generator/Choice.cpp @@ -8,7 +8,7 @@ namespace storm { namespace generator { template - Choice::Choice(uint_fast64_t actionIndex, bool markovian) : markovian(markovian), actionIndex(actionIndex), distribution(), totalMass(storm::utility::zero()), choiceRewards() { + Choice::Choice(uint_fast64_t actionIndex, bool markovian) : markovian(markovian), actionIndex(actionIndex), distribution(), totalMass(storm::utility::zero()), rewards(), labels() { // Intentionally left empty. } @@ -33,24 +33,24 @@ namespace storm { } template - void Choice::addChoiceLabel(uint_fast64_t label) { - if (!choiceLabels) { - choiceLabels = LabelSet(); + void Choice::addLabel(uint_fast64_t label) { + if (!labels) { + labels = LabelSet(); } - choiceLabels->insert(label); + labels->insert(label); } template - void Choice::addChoiceLabels(LabelSet const& labelSet) { - if (!choiceLabels) { - choiceLabels = LabelSet(); + void Choice::addLabels(LabelSet const& labelSet) { + if (!labels) { + labels = LabelSet(); } - choiceLabels->insert(labelSet.begin(), labelSet.end()); + labels->insert(labelSet.begin(), labelSet.end()); } template - boost::container::flat_set const& Choice::getChoiceLabels() const { - return *choiceLabels; + boost::container::flat_set const& Choice::getLabels() const { + return *labels; } template @@ -70,13 +70,18 @@ namespace storm { } template - void Choice::addChoiceReward(ValueType const& value) { - choiceRewards.push_back(value); + void Choice::addReward(ValueType const& value) { + rewards.push_back(value); } template - std::vector const& Choice::getChoiceRewards() const { - return choiceRewards; + void Choice::addRewards(std::vector&& values) { + this->rewards = std::move(values); + } + + template + std::vector const& Choice::getRewards() const { + return rewards; } template diff --git a/src/generator/Choice.h b/src/generator/Choice.h index 76b6c8d16..bd20af0f0 100644 --- a/src/generator/Choice.h +++ b/src/generator/Choice.h @@ -66,21 +66,21 @@ namespace storm { * * @param label The label to associate with this choice. */ - void addChoiceLabel(uint_fast64_t label); + void addLabel(uint_fast64_t label); /*! * Adds the given label set to the labels associated with this choice. * * @param labelSet The label set to associate with this choice. */ - void addChoiceLabels(LabelSet const& labelSet); + void addLabels(LabelSet const& labelSet); /*! * Retrieves the set of labels associated with this choice. * * @return The set of labels associated with this choice. */ - LabelSet const& getChoiceLabels() const; + LabelSet const& getLabels() const; /*! * Retrieves the index of the action of this choice. @@ -104,12 +104,17 @@ namespace storm { /*! * Adds the given value to the reward associated with this choice. */ - void addChoiceReward(ValueType const& value); + void addReward(ValueType const& value); + + /*! + * Adds the given choices rewards to this choice. + */ + void addRewards(std::vector&& values); /*! * Retrieves the rewards for this choice under selected reward models. */ - std::vector const& getChoiceRewards() const; + std::vector const& getRewards() const; /*! * Retrieves whether the choice is Markovian. @@ -135,10 +140,10 @@ namespace storm { ValueType totalMass; // The reward values associated with this choice. - std::vector choiceRewards; + std::vector rewards; // The labels that are associated with this choice. - boost::optional choiceLabels; + boost::optional labels; }; template diff --git a/src/generator/JaniNextStateGenerator.cpp b/src/generator/JaniNextStateGenerator.cpp index 5b5f54151..0df974f3b 100644 --- a/src/generator/JaniNextStateGenerator.cpp +++ b/src/generator/JaniNextStateGenerator.cpp @@ -22,7 +22,7 @@ namespace storm { } template - JaniNextStateGenerator::JaniNextStateGenerator(storm::jani::Model const& model, NextStateGeneratorOptions const& options, bool flag) : NextStateGenerator(model.getExpressionManager(), VariableInformation(model), options), model(model), rewardVariables() { + JaniNextStateGenerator::JaniNextStateGenerator(storm::jani::Model const& model, NextStateGeneratorOptions const& options, bool flag) : NextStateGenerator(model.getExpressionManager(), VariableInformation(model), options), model(model), rewardVariables(), hasStateActionRewards(false) { STORM_LOG_THROW(model.hasDefaultComposition(), storm::exceptions::WrongFormatException, "The explicit next-state generator currently does not support custom system compositions."); STORM_LOG_THROW(!model.hasNonGlobalTransientVariable(), storm::exceptions::InvalidSettingsException, "The explicit next-state generator currently does not support automata-local transient variables."); STORM_LOG_THROW(!this->options.isBuildChoiceLabelsSet(), storm::exceptions::InvalidSettingsException, "JANI next-state generator cannot generate choice labels."); @@ -282,6 +282,10 @@ namespace storm { if (this->isDeterministicModel() && totalNumberOfChoices > 1) { Choice globalChoice; + // For CTMCs, we need to keep track of the total exit rate to scale the action rewards later. For DTMCs + // this is equal to the number of choices, which is why we initialize it like this here. + ValueType totalExitRate = this->isDiscreteTimeModel() ? static_cast(totalNumberOfChoices) : storm::utility::zero(); + // Iterate over all choices and combine the probabilities/rates into one choice. for (auto const& choice : allChoices) { for (auto const& stateProbabilityPair : choice) { @@ -292,11 +296,23 @@ namespace storm { } } + if (hasStateActionRewards && !this->isDiscreteTimeModel()) { + totalExitRate += choice.getTotalMass(); + } + if (this->options.isBuildChoiceLabelsSet()) { - globalChoice.addChoiceLabels(choice.getChoiceLabels()); + globalChoice.addLabels(choice.getLabels()); } } - + + std::vector stateActionRewards(rewardVariables.size(), storm::utility::zero()); + for (auto const& choice : allChoices) { + for (uint_fast64_t rewardVariableIndex = 0; rewardVariableIndex < rewardVariables.size(); ++rewardVariableIndex) { + stateActionRewards[rewardVariableIndex] += choice.getRewards()[rewardVariableIndex] * choice.getTotalMass() / totalExitRate; + } + } + globalChoice.addRewards(std::move(stateActionRewards)); + // Move the newly fused choice in place. allChoices.clear(); allChoices.push_back(std::move(globalChoice)); @@ -349,7 +365,7 @@ namespace storm { } // Create the state-action reward for the newly created choice. - performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&choice] (ValueType const& value) { choice.addChoiceReward(value); } ); + performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&choice] (ValueType const& value) { choice.addReward(value); } ); // Check that the resulting distribution is in fact a distribution. STORM_LOG_THROW(!this->isDiscreteTimeModel() || this->comparator.isOne(probabilitySum), storm::exceptions::WrongFormatException, "Probabilities do not sum to one for edge (actually sum to " << probabilitySum << ")."); @@ -385,6 +401,7 @@ namespace storm { while (!done) { boost::container::flat_map* currentTargetStates = new boost::container::flat_map(); boost::container::flat_map* newTargetStates = new boost::container::flat_map(); + std::vector stateActionRewards(rewardVariables.size(), storm::utility::zero()); currentTargetStates->emplace(state, storm::utility::one()); @@ -405,6 +422,10 @@ namespace storm { newTargetStates->emplace(newTargetState, stateProbabilityPair.second * this->evaluator.asRational(destination.getProbability())); } } + + // Create the state-action reward for the newly created choice. + auto valueIt = stateActionRewards.begin(); + performTransientAssignments(edge.getAssignments().getTransientAssignments(), [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); } // If there is one more command to come, shift the target states one time step back. @@ -423,6 +444,9 @@ namespace storm { // Now create the actual distribution. Choice& choice = result.back(); + // Add the rewards to the choice. + choice.addRewards(std::move(stateActionRewards)); + // Add the probabilities/rates to the newly created choice. ValueType probabilitySum = storm::utility::zero(); for (auto const& stateProbabilityPair : *newTargetStates) { @@ -601,6 +625,7 @@ namespace storm { } if (*rewardVariableIt == assignment.getExpressionVariable()) { rewardModelInformation[std::distance(rewardVariables.begin(), rewardVariableIt)].setHasStateActionRewards(); + hasStateActionRewards = true; ++rewardVariableIt; } } diff --git a/src/generator/JaniNextStateGenerator.h b/src/generator/JaniNextStateGenerator.h index d9932f60a..d0b5707a6 100644 --- a/src/generator/JaniNextStateGenerator.h +++ b/src/generator/JaniNextStateGenerator.h @@ -108,6 +108,9 @@ namespace storm { /// A vector storing information about the corresponding reward models (variables). std::vector rewardModelInformation; + + // A flag that stores whether at least one of the selected reward models has state-action rewards. + bool hasStateActionRewards; }; } diff --git a/src/generator/PrismNextStateGenerator.cpp b/src/generator/PrismNextStateGenerator.cpp index 1ecbec2ab..6c24cd00b 100644 --- a/src/generator/PrismNextStateGenerator.cpp +++ b/src/generator/PrismNextStateGenerator.cpp @@ -242,7 +242,7 @@ namespace storm { } if (this->options.isBuildChoiceLabelsSet()) { - globalChoice.addChoiceLabels(choice.getChoiceLabels()); + globalChoice.addLabels(choice.getLabels()); } } @@ -259,7 +259,7 @@ namespace storm { } } - globalChoice.addChoiceReward(stateActionRewardValue); + globalChoice.addReward(stateActionRewardValue); } // Move the newly fused choice in place. @@ -382,7 +382,7 @@ namespace storm { // Remember the command labels only if we were asked to. if (this->options.isBuildChoiceLabelsSet()) { - choice.addChoiceLabel(command.getGlobalIndex()); + choice.addLabel(command.getGlobalIndex()); } // Iterate over all updates of the current command. @@ -410,7 +410,7 @@ namespace storm { } } } - choice.addChoiceReward(stateActionRewardValue); + choice.addReward(stateActionRewardValue); } // Check that the resulting distribution is in fact a distribution. @@ -486,7 +486,7 @@ namespace storm { if (this->options.isBuildChoiceLabelsSet()) { // Add the labels of all commands to this choice. for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) { - choice.addChoiceLabel(iteratorList[i]->get().getGlobalIndex()); + choice.addLabel(iteratorList[i]->get().getGlobalIndex()); } } @@ -511,7 +511,7 @@ namespace storm { } } } - choice.addChoiceReward(stateActionRewardValue); + choice.addReward(stateActionRewardValue); } // Dispose of the temporary maps.