From 4cda27f09473db69c2879b6adbf8914fc7fc22ac Mon Sep 17 00:00:00 2001 From: TimQu Date: Tue, 25 Sep 2018 15:20:52 +0200 Subject: [PATCH] jani next state generator now sets values of transient variables in the evaluator to evaluate reward expressions --- .../generator/JaniNextStateGenerator.cpp | 175 ++++++++++++------ src/storm/generator/JaniNextStateGenerator.h | 12 +- .../generator/TransientVariableInformation.h | 4 + 3 files changed, 136 insertions(+), 55 deletions(-) diff --git a/src/storm/generator/JaniNextStateGenerator.cpp b/src/storm/generator/JaniNextStateGenerator.cpp index 7109b7b1d..78e59aed8 100644 --- a/src/storm/generator/JaniNextStateGenerator.cpp +++ b/src/storm/generator/JaniNextStateGenerator.cpp @@ -80,6 +80,7 @@ namespace storm { // Create a proper evalator. this->evaluator = std::make_unique>(model.getManager()); + this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator); if (this->options.isBuildAllRewardModelsSet()) { for (auto const& variable : model.getGlobalVariables()) { @@ -383,12 +384,10 @@ namespace storm { } template - void JaniNextStateGenerator::applyTransientUpdate(TransientVariableValuation& transientValuation, storm::jani::EdgeDestination const& destination, int64_t assignmentLevel, storm::expressions::ExpressionEvaluator const& expressionEvaluator) { + void JaniNextStateGenerator::applyTransientUpdate(TransientVariableValuation& transientValuation, storm::jani::detail::ConstAssignments const& transientAssignments, storm::expressions::ExpressionEvaluator const& expressionEvaluator) { - // Perform the assignments. - auto const& assignments = destination.getOrderedAssignments().getTransientAssignments(assignmentLevel); - auto assignmentIt = assignments.begin(); - auto assignmentIte = assignments.end(); + auto assignmentIt = transientAssignments.begin(); + auto assignmentIte = transientAssignments.end(); // Iterate over all boolean assignments and carry them out. auto boolIt = this->transientVariableInformation.booleanVariableInformation.begin(); @@ -459,15 +458,22 @@ namespace storm { // need the state rewards then. std::vector stateRewards(this->rewardVariables.size(), storm::utility::zero()); uint64_t automatonIndex = 0; + TransientVariableValuation transientVariableValuation; for (auto const& automatonRef : this->parallelAutomata) { auto const& automaton = automatonRef.get(); uint64_t currentLocationIndex = locations[automatonIndex]; storm::jani::Location const& location = automaton.getLocation(currentLocationIndex); + STORM_LOG_ASSERT(!location.getAssignments().hasMultipleLevels(true), "Indexed assignments at locations are not supported in the jani standard."); + applyTransientUpdate(transientVariableValuation, location.getAssignments().getTransientAssignments(), *this->evaluator); auto valueIt = stateRewards.begin(); performTransientAssignments(location.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); ++automatonIndex; } - result.addStateRewards(std::move(stateRewards)); + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + //result.addStateRewards(std::move(stateRewards)); + result.addStateRewards(evaluateRewardExpressions()); + this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator); + // If a terminal expression was set and we must not expand this state, return now. if (!this->terminalStates.empty()) { @@ -552,8 +558,24 @@ namespace storm { } Choice choice(edge.getActionIndex(), static_cast(exitRate)); + + // Perform the transient edge assignments and create the state action rewards TransientVariableValuation transientVariableValuation; - std::vector stateActionRewards(rewardVariables.size(), storm::utility::zero()); + if (!edge.getAssignments().empty()) { + for (int64_t assignmentLevel = edge.getAssignments().getLowestLevel(true); assignmentLevel <= edge.getAssignments().getHighestLevel(true); ++assignmentLevel) { + transientVariableValuation.clear(); + applyTransientUpdate(transientVariableValuation, edge.getAssignments().getTransientAssignments(assignmentLevel), *this->evaluator); + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + } + } + std::vector stateActionRewards = evaluateRewardExpressions(); + if (!edge.getAssignments().empty()) { + transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator); + } + + + auto valueIt = stateActionRewards.begin(); + // performTransientAssignments(edge.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); // Iterate over all updates of the current command. ValueType probabilitySum = storm::utility::zero(); @@ -561,6 +583,7 @@ namespace storm { ValueType probability = this->evaluator->asRational(destination.getProbability()); if (probability != storm::utility::zero()) { + bool evaluatorChanged = false; // Obtain target state index and add it to the list of known states. If it has not yet been // seen, we also add it to the set of states that have yet to be explored. int64_t assignmentLevel = edge.getLowestAssignmentLevel(); // Might be the largest possible integer, if there is no assignment @@ -568,28 +591,37 @@ namespace storm { bool hasTransientAssignments = destination.hasTransientAssignment(); CompressedState newState = applyUpdate(state, destination, this->variableInformation.locationVariables[automatonIndex], assignmentLevel, *this->evaluator); if (hasTransientAssignments) { - transientVariableValuation.clear(); STORM_LOG_ASSERT(this->options.isScaleAndLiftTransitionRewardsSet(), "Transition rewards are not supported and scaling to action rewards is disabled."); - applyTransientUpdate(transientVariableValuation, destination, assignmentLevel, *this->evaluator); + transientVariableValuation.clear(); + applyTransientUpdate(transientVariableValuation, destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator); + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + evaluatorChanged = true; // Create the rewards for this destination auto valueIt = stateActionRewards.begin(); - performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } ); + // performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } ); } if (assignmentLevel < highestLevel) { while (assignmentLevel < highestLevel) { ++assignmentLevel; unpackStateIntoEvaluator(newState, this->variableInformation, *this->evaluator); - transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); - transientVariableValuation.clear(); + evaluatorChanged = true; newState = applyUpdate(newState, destination, this->variableInformation.locationVariables[automatonIndex], assignmentLevel, *this->evaluator); if (hasTransientAssignments) { - applyTransientUpdate(transientVariableValuation, destination, assignmentLevel, *this->evaluator); + transientVariableValuation.clear(); + applyTransientUpdate(transientVariableValuation, destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator); + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + evaluatorChanged = true; // update the rewards for this destination auto valueIt = stateActionRewards.begin(); - performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } ); + // performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } ); } } - // Restore the old information + } + + addEvaluatedRewardExpressions(stateActionRewards, probability); + + if (evaluatorChanged) { + // Restore the old variable valuation unpackStateIntoEvaluator(state, this->variableInformation, *this->evaluator); if (hasTransientAssignments) { this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator); @@ -608,9 +640,7 @@ namespace storm { } } - // Create the state-action reward for the newly created choice. Note that edge assignments are all transient and we assume that no transient variables occur on the rhs of transient assignments, i.e., the assignment level does not matter here - auto valueIt = stateActionRewards.begin(); - performTransientAssignments(edge.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); + // Add the state action rewards choice.addRewards(std::move(stateActionRewards)); if (this->options.isExplorationChecksSet()) { @@ -651,20 +681,39 @@ namespace storm { template void JaniNextStateGenerator::generateSynchronizedDistribution(storm::storage::BitVector const& state, AutomataEdgeSets const& edgeCombination, std::vector const& iteratorList, storm::builder::jit::Distribution& distribution, std::vector& stateActionRewards, EdgeIndexSet& edgeIndices, StateToIdCallback stateToIdCallback) { - int64_t lowestLevel = std::numeric_limits::max(); - int64_t highestLevel = std::numeric_limits::min(); + // Collect some information of the edges. + int64_t lowestDestinationAssignmentLevel = std::numeric_limits::max(); + int64_t highestDestinationAssignmentLevel = std::numeric_limits::min(); + int64_t lowestEdgeAssignmentLevel = std::numeric_limits::max(); + int64_t highestEdgeAssignmentLevel = std::numeric_limits::min(); uint64_t numDestinations = 1; for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) { if (this->getOptions().isBuildChoiceOriginsSet()) { edgeIndices.insert(model.encodeAutomatonAndEdgeIndices(edgeCombination[i].first, iteratorList[i]->first)); } storm::jani::Edge const& edge = *iteratorList[i]->second; - lowestLevel = std::min(lowestLevel, edge.getLowestAssignmentLevel()); - highestLevel = std::max(highestLevel, edge.getHighestAssignmentLevel()); + lowestDestinationAssignmentLevel = std::min(lowestDestinationAssignmentLevel, edge.getLowestAssignmentLevel()); + highestDestinationAssignmentLevel = std::max(highestDestinationAssignmentLevel, edge.getHighestAssignmentLevel()); + lowestEdgeAssignmentLevel = std::min(lowestEdgeAssignmentLevel, edge.getAssignments().getLowestLevel(true)); + highestEdgeAssignmentLevel = std::max(highestEdgeAssignmentLevel, edge.getAssignments().getHighestLevel(true)); numDestinations *= edge.getNumberOfDestinations(); } - std::vector destinationRewards; + + // Perform the edge assignments (if there are any) TransientVariableValuation transientVariableValuation; + if (lowestEdgeAssignmentLevel <= highestEdgeAssignmentLevel) { + for (int64_t assignmentLevel = lowestEdgeAssignmentLevel; assignmentLevel <= highestEdgeAssignmentLevel; ++assignmentLevel) { + transientVariableValuation.clear(); + for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) { + storm::jani::Edge const& edge = *iteratorList[i]->second; + applyTransientUpdate(transientVariableValuation, edge.getAssignments().getTransientAssignments(assignmentLevel), *this->evaluator); + } + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + } + addEvaluatedRewardExpressions(stateActionRewards, storm::utility::one()); + transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator); + } + std::vector destinations; std::vector locationVars; destinations.reserve(iteratorList.size()); @@ -674,7 +723,6 @@ namespace storm { // First assignment level destinations.clear(); locationVars.clear(); - destinationRewards.assign(rewardVariables.size(), storm::utility::zero()); transientVariableValuation.clear(); CompressedState successorState = state; ValueType successorProbability = storm::utility::one(); @@ -697,50 +745,48 @@ namespace storm { break; } - successorState = applyUpdate(successorState, *destinations.back(), *locationVars.back(), lowestLevel, *this->evaluator); - applyTransientUpdate(transientVariableValuation, *destinations.back(), lowestLevel, *this->evaluator); + successorState = applyUpdate(successorState, *destinations.back(), *locationVars.back(), lowestDestinationAssignmentLevel, *this->evaluator); + applyTransientUpdate(transientVariableValuation, destinations.back()->getOrderedAssignments().getTransientAssignments(lowestDestinationAssignmentLevel), *this->evaluator); // add the reward for this destination to the destination rewards - auto valueIt = destinationRewards.begin(); - performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(lowestLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); + //auto valueIt = destinationRewards.begin(); + //performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(lowestDestinationAssignmentLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); } + if (!storm::utility::isZero(successorProbability)) { - if (lowestLevel < highestLevel) { - int64_t assignmentLevel = lowestLevel; - // remaining assignment levels - while (assignmentLevel < highestLevel) { - ++assignmentLevel; - unpackStateIntoEvaluator(successorState, this->variableInformation, *this->evaluator); - transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); - transientVariableValuation.clear(); - - auto locationVarIt = locationVars.begin(); - for (auto const& destPtr : destinations) { - successorState = applyUpdate(successorState, *destPtr, **locationVarIt, assignmentLevel, *this->evaluator); - applyTransientUpdate(transientVariableValuation, *destinations.back(), lowestLevel, *this->evaluator); - // add the reward for this destination to the destination rewards - auto valueIt = destinationRewards.begin(); - performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); - ++locationVarIt; - } + bool evaluatorChanged = false; + // remaining assignment levels (if there are any) + for (int64_t assignmentLevel = lowestDestinationAssignmentLevel + 1; assignmentLevel <= highestDestinationAssignmentLevel; ++assignmentLevel) { + unpackStateIntoEvaluator(successorState, this->variableInformation, *this->evaluator); + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + transientVariableValuation.clear(); + evaluatorChanged = true; + auto locationVarIt = locationVars.begin(); + for (auto const& destPtr : destinations) { + successorState = applyUpdate(successorState, *destPtr, **locationVarIt, assignmentLevel, *this->evaluator); + applyTransientUpdate(transientVariableValuation, destinations.back()->getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator); + // add the reward for this destination to the destination rewards + // auto valueIt = destinationRewards.begin(); + // performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); + ++locationVarIt; } + } + if (!transientVariableValuation.empty()) { + evaluatorChanged = true; + transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet()); + } + addEvaluatedRewardExpressions(stateActionRewards, successorProbability); + if (evaluatorChanged) { // Restore the old state information unpackStateIntoEvaluator(state, this->variableInformation, *this->evaluator); this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator); } + StateType id = stateToIdCallback(successorState); distribution.add(id, successorProbability); - storm::utility::vector::addScaledVector(stateActionRewards, destinationRewards, successorProbability); } } - - for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) { - storm::jani::Edge const& edge = *iteratorList[i]->second; - // Add the state-action reward for the newly created choice. Note that edge assignments are all transient and we assume that no transient variables occur on the rhs of transient assignments, i.e., the assignment level does not matter here - auto valueIt = stateActionRewards.begin(); - performTransientAssignments(edge.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } ); - } } template @@ -980,6 +1026,27 @@ namespace storm { } } + + template + std::vector JaniNextStateGenerator::evaluateRewardExpressions() const { + std::vector result; + result.reserve(rewardVariables.size()); + for (auto const& rewardExpression : rewardVariables) { + result.push_back(this->evaluator->asRational(rewardExpression.getExpression())); + } + return result; + } + + template + void JaniNextStateGenerator::addEvaluatedRewardExpressions(std::vector& rewards, ValueType const& factor) const { + assert(rewards.size() == rewardVariables.size()); + auto rewIt = rewards.begin(); + for (auto const& rewardExpression : rewardVariables) { + (*rewIt) += factor * this->evaluator->asRational(rewardExpression.getExpression()); + ++rewIt; + } + } + template void JaniNextStateGenerator::buildRewardModelInformation() { // Prepare all reward model information structs. diff --git a/src/storm/generator/JaniNextStateGenerator.h b/src/storm/generator/JaniNextStateGenerator.h index 1dbc4440e..b4d587895 100644 --- a/src/storm/generator/JaniNextStateGenerator.h +++ b/src/storm/generator/JaniNextStateGenerator.h @@ -89,7 +89,7 @@ namespace storm { * @params assignmentLevel The assignmentLevel that is to be considered for the update. * @return The resulting state. */ - void applyTransientUpdate(TransientVariableValuation& transientValuation, storm::jani::EdgeDestination const& destination, int64_t assignmentlevel, storm::expressions::ExpressionEvaluator const& expressionEvaluator); + void applyTransientUpdate(TransientVariableValuation& transientValuation, storm::jani::detail::ConstAssignments const& transientAssignments, storm::expressions::ExpressionEvaluator const& expressionEvaluator); /*! * Retrieves all choices possible from the given state. @@ -128,6 +128,16 @@ namespace storm { */ void performTransientAssignments(storm::jani::detail::ConstAssignments const& transientAssignments, storm::expressions::ExpressionEvaluator const& expressionEvaluator, std::function const& callback); + /*! + * Evaluates the reward expressions using the current evaluator + */ + std::vector evaluateRewardExpressions() const; + + /*! + * Evaluates the reward expressions using the current evaluator, multiplies them by the given factor and adds it to the given vector. + */ + void addEvaluatedRewardExpressions(std::vector& rewards, ValueType const& factor) const; + /*! * Builds the information structs for the reward models. */ diff --git a/src/storm/generator/TransientVariableInformation.h b/src/storm/generator/TransientVariableInformation.h index 8300b6702..8f221c734 100644 --- a/src/storm/generator/TransientVariableInformation.h +++ b/src/storm/generator/TransientVariableInformation.h @@ -64,6 +64,10 @@ namespace storm { rationalValues.clear(); } + bool empty() const { + return booleanValues.empty() && integerValues.empty() && rationalValues.empty(); + } + void setInEvaluator(storm::expressions::ExpressionEvaluator& evaluator, bool explorationChecks) const { for (auto const& varValue : booleanValues) { evaluator.setBooleanValue(varValue.first->variable, varValue.second);