Browse Source

jani next state generator now sets values of transient variables in the evaluator to evaluate reward expressions

tempestpy_adaptions
TimQu 6 years ago
parent
commit
4cda27f094
  1. 175
      src/storm/generator/JaniNextStateGenerator.cpp
  2. 12
      src/storm/generator/JaniNextStateGenerator.h
  3. 4
      src/storm/generator/TransientVariableInformation.h

175
src/storm/generator/JaniNextStateGenerator.cpp

@ -80,6 +80,7 @@ namespace storm {
// Create a proper evalator.
this->evaluator = std::make_unique<storm::expressions::ExpressionEvaluator<ValueType>>(model.getManager());
this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator);
if (this->options.isBuildAllRewardModelsSet()) {
for (auto const& variable : model.getGlobalVariables()) {
@ -383,12 +384,10 @@ namespace storm {
}
template<typename ValueType, typename StateType>
void JaniNextStateGenerator<ValueType, StateType>::applyTransientUpdate(TransientVariableValuation<ValueType>& transientValuation, storm::jani::EdgeDestination const& destination, int64_t assignmentLevel, storm::expressions::ExpressionEvaluator<ValueType> const& expressionEvaluator) {
void JaniNextStateGenerator<ValueType, StateType>::applyTransientUpdate(TransientVariableValuation<ValueType>& transientValuation, storm::jani::detail::ConstAssignments const& transientAssignments, storm::expressions::ExpressionEvaluator<ValueType> const& expressionEvaluator) {
// Perform the assignments.
auto const& assignments = destination.getOrderedAssignments().getTransientAssignments(assignmentLevel);
auto assignmentIt = assignments.begin();
auto assignmentIte = assignments.end();
auto assignmentIt = transientAssignments.begin();
auto assignmentIte = transientAssignments.end();
// Iterate over all boolean assignments and carry them out.
auto boolIt = this->transientVariableInformation.booleanVariableInformation.begin();
@ -459,15 +458,22 @@ namespace storm {
// need the state rewards then.
std::vector<ValueType> stateRewards(this->rewardVariables.size(), storm::utility::zero<ValueType>());
uint64_t automatonIndex = 0;
TransientVariableValuation<ValueType> transientVariableValuation;
for (auto const& automatonRef : this->parallelAutomata) {
auto const& automaton = automatonRef.get();
uint64_t currentLocationIndex = locations[automatonIndex];
storm::jani::Location const& location = automaton.getLocation(currentLocationIndex);
STORM_LOG_ASSERT(!location.getAssignments().hasMultipleLevels(true), "Indexed assignments at locations are not supported in the jani standard.");
applyTransientUpdate(transientVariableValuation, location.getAssignments().getTransientAssignments(), *this->evaluator);
auto valueIt = stateRewards.begin();
performTransientAssignments(location.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
++automatonIndex;
}
result.addStateRewards(std::move(stateRewards));
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
//result.addStateRewards(std::move(stateRewards));
result.addStateRewards(evaluateRewardExpressions());
this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator);
// If a terminal expression was set and we must not expand this state, return now.
if (!this->terminalStates.empty()) {
@ -552,8 +558,24 @@ namespace storm {
}
Choice<ValueType> choice(edge.getActionIndex(), static_cast<bool>(exitRate));
// Perform the transient edge assignments and create the state action rewards
TransientVariableValuation<ValueType> transientVariableValuation;
std::vector<ValueType> stateActionRewards(rewardVariables.size(), storm::utility::zero<ValueType>());
if (!edge.getAssignments().empty()) {
for (int64_t assignmentLevel = edge.getAssignments().getLowestLevel(true); assignmentLevel <= edge.getAssignments().getHighestLevel(true); ++assignmentLevel) {
transientVariableValuation.clear();
applyTransientUpdate(transientVariableValuation, edge.getAssignments().getTransientAssignments(assignmentLevel), *this->evaluator);
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
}
}
std::vector<ValueType> stateActionRewards = evaluateRewardExpressions();
if (!edge.getAssignments().empty()) {
transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator);
}
auto valueIt = stateActionRewards.begin();
// performTransientAssignments(edge.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
// Iterate over all updates of the current command.
ValueType probabilitySum = storm::utility::zero<ValueType>();
@ -561,6 +583,7 @@ namespace storm {
ValueType probability = this->evaluator->asRational(destination.getProbability());
if (probability != storm::utility::zero<ValueType>()) {
bool evaluatorChanged = false;
// Obtain target state index and add it to the list of known states. If it has not yet been
// seen, we also add it to the set of states that have yet to be explored.
int64_t assignmentLevel = edge.getLowestAssignmentLevel(); // Might be the largest possible integer, if there is no assignment
@ -568,28 +591,37 @@ namespace storm {
bool hasTransientAssignments = destination.hasTransientAssignment();
CompressedState newState = applyUpdate(state, destination, this->variableInformation.locationVariables[automatonIndex], assignmentLevel, *this->evaluator);
if (hasTransientAssignments) {
transientVariableValuation.clear();
STORM_LOG_ASSERT(this->options.isScaleAndLiftTransitionRewardsSet(), "Transition rewards are not supported and scaling to action rewards is disabled.");
applyTransientUpdate(transientVariableValuation, destination, assignmentLevel, *this->evaluator);
transientVariableValuation.clear();
applyTransientUpdate(transientVariableValuation, destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator);
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
evaluatorChanged = true;
// Create the rewards for this destination
auto valueIt = stateActionRewards.begin();
performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } );
// performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } );
}
if (assignmentLevel < highestLevel) {
while (assignmentLevel < highestLevel) {
++assignmentLevel;
unpackStateIntoEvaluator(newState, this->variableInformation, *this->evaluator);
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
transientVariableValuation.clear();
evaluatorChanged = true;
newState = applyUpdate(newState, destination, this->variableInformation.locationVariables[automatonIndex], assignmentLevel, *this->evaluator);
if (hasTransientAssignments) {
applyTransientUpdate(transientVariableValuation, destination, assignmentLevel, *this->evaluator);
transientVariableValuation.clear();
applyTransientUpdate(transientVariableValuation, destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator);
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
evaluatorChanged = true;
// update the rewards for this destination
auto valueIt = stateActionRewards.begin();
performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } );
// performTransientAssignments(destination.getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&] (ValueType const& value) { *valueIt += (value * probability); ++valueIt; } );
}
}
// Restore the old information
}
addEvaluatedRewardExpressions(stateActionRewards, probability);
if (evaluatorChanged) {
// Restore the old variable valuation
unpackStateIntoEvaluator(state, this->variableInformation, *this->evaluator);
if (hasTransientAssignments) {
this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator);
@ -608,9 +640,7 @@ namespace storm {
}
}
// Create the state-action reward for the newly created choice. Note that edge assignments are all transient and we assume that no transient variables occur on the rhs of transient assignments, i.e., the assignment level does not matter here
auto valueIt = stateActionRewards.begin();
performTransientAssignments(edge.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
// Add the state action rewards
choice.addRewards(std::move(stateActionRewards));
if (this->options.isExplorationChecksSet()) {
@ -651,20 +681,39 @@ namespace storm {
template<typename ValueType, typename StateType>
void JaniNextStateGenerator<ValueType, StateType>::generateSynchronizedDistribution(storm::storage::BitVector const& state, AutomataEdgeSets const& edgeCombination, std::vector<EdgeSetWithIndices::const_iterator> const& iteratorList, storm::builder::jit::Distribution<StateType, ValueType>& distribution, std::vector<ValueType>& stateActionRewards, EdgeIndexSet& edgeIndices, StateToIdCallback stateToIdCallback) {
int64_t lowestLevel = std::numeric_limits<int64_t>::max();
int64_t highestLevel = std::numeric_limits<int64_t>::min();
// Collect some information of the edges.
int64_t lowestDestinationAssignmentLevel = std::numeric_limits<int64_t>::max();
int64_t highestDestinationAssignmentLevel = std::numeric_limits<int64_t>::min();
int64_t lowestEdgeAssignmentLevel = std::numeric_limits<int64_t>::max();
int64_t highestEdgeAssignmentLevel = std::numeric_limits<int64_t>::min();
uint64_t numDestinations = 1;
for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) {
if (this->getOptions().isBuildChoiceOriginsSet()) {
edgeIndices.insert(model.encodeAutomatonAndEdgeIndices(edgeCombination[i].first, iteratorList[i]->first));
}
storm::jani::Edge const& edge = *iteratorList[i]->second;
lowestLevel = std::min(lowestLevel, edge.getLowestAssignmentLevel());
highestLevel = std::max(highestLevel, edge.getHighestAssignmentLevel());
lowestDestinationAssignmentLevel = std::min(lowestDestinationAssignmentLevel, edge.getLowestAssignmentLevel());
highestDestinationAssignmentLevel = std::max(highestDestinationAssignmentLevel, edge.getHighestAssignmentLevel());
lowestEdgeAssignmentLevel = std::min(lowestEdgeAssignmentLevel, edge.getAssignments().getLowestLevel(true));
highestEdgeAssignmentLevel = std::max(highestEdgeAssignmentLevel, edge.getAssignments().getHighestLevel(true));
numDestinations *= edge.getNumberOfDestinations();
}
std::vector<ValueType> destinationRewards;
// Perform the edge assignments (if there are any)
TransientVariableValuation<ValueType> transientVariableValuation;
if (lowestEdgeAssignmentLevel <= highestEdgeAssignmentLevel) {
for (int64_t assignmentLevel = lowestEdgeAssignmentLevel; assignmentLevel <= highestEdgeAssignmentLevel; ++assignmentLevel) {
transientVariableValuation.clear();
for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) {
storm::jani::Edge const& edge = *iteratorList[i]->second;
applyTransientUpdate(transientVariableValuation, edge.getAssignments().getTransientAssignments(assignmentLevel), *this->evaluator);
}
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
}
addEvaluatedRewardExpressions(stateActionRewards, storm::utility::one<ValueType>());
transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator);
}
std::vector<storm::jani::EdgeDestination const*> destinations;
std::vector<LocationVariableInformation const*> locationVars;
destinations.reserve(iteratorList.size());
@ -674,7 +723,6 @@ namespace storm {
// First assignment level
destinations.clear();
locationVars.clear();
destinationRewards.assign(rewardVariables.size(), storm::utility::zero<ValueType>());
transientVariableValuation.clear();
CompressedState successorState = state;
ValueType successorProbability = storm::utility::one<ValueType>();
@ -697,50 +745,48 @@ namespace storm {
break;
}
successorState = applyUpdate(successorState, *destinations.back(), *locationVars.back(), lowestLevel, *this->evaluator);
applyTransientUpdate(transientVariableValuation, *destinations.back(), lowestLevel, *this->evaluator);
successorState = applyUpdate(successorState, *destinations.back(), *locationVars.back(), lowestDestinationAssignmentLevel, *this->evaluator);
applyTransientUpdate(transientVariableValuation, destinations.back()->getOrderedAssignments().getTransientAssignments(lowestDestinationAssignmentLevel), *this->evaluator);
// add the reward for this destination to the destination rewards
auto valueIt = destinationRewards.begin();
performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(lowestLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
//auto valueIt = destinationRewards.begin();
//performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(lowestDestinationAssignmentLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
}
if (!storm::utility::isZero(successorProbability)) {
if (lowestLevel < highestLevel) {
int64_t assignmentLevel = lowestLevel;
// remaining assignment levels
while (assignmentLevel < highestLevel) {
++assignmentLevel;
unpackStateIntoEvaluator(successorState, this->variableInformation, *this->evaluator);
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
transientVariableValuation.clear();
auto locationVarIt = locationVars.begin();
for (auto const& destPtr : destinations) {
successorState = applyUpdate(successorState, *destPtr, **locationVarIt, assignmentLevel, *this->evaluator);
applyTransientUpdate(transientVariableValuation, *destinations.back(), lowestLevel, *this->evaluator);
// add the reward for this destination to the destination rewards
auto valueIt = destinationRewards.begin();
performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
++locationVarIt;
}
bool evaluatorChanged = false;
// remaining assignment levels (if there are any)
for (int64_t assignmentLevel = lowestDestinationAssignmentLevel + 1; assignmentLevel <= highestDestinationAssignmentLevel; ++assignmentLevel) {
unpackStateIntoEvaluator(successorState, this->variableInformation, *this->evaluator);
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
transientVariableValuation.clear();
evaluatorChanged = true;
auto locationVarIt = locationVars.begin();
for (auto const& destPtr : destinations) {
successorState = applyUpdate(successorState, *destPtr, **locationVarIt, assignmentLevel, *this->evaluator);
applyTransientUpdate(transientVariableValuation, destinations.back()->getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator);
// add the reward for this destination to the destination rewards
// auto valueIt = destinationRewards.begin();
// performTransientAssignments(destinations.back()->getOrderedAssignments().getTransientAssignments(assignmentLevel), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
++locationVarIt;
}
}
if (!transientVariableValuation.empty()) {
evaluatorChanged = true;
transientVariableValuation.setInEvaluator(*this->evaluator, this->getOptions().isExplorationChecksSet());
}
addEvaluatedRewardExpressions(stateActionRewards, successorProbability);
if (evaluatorChanged) {
// Restore the old state information
unpackStateIntoEvaluator(state, this->variableInformation, *this->evaluator);
this->transientVariableInformation.setDefaultValuesInEvaluator(*this->evaluator);
}
StateType id = stateToIdCallback(successorState);
distribution.add(id, successorProbability);
storm::utility::vector::addScaledVector(stateActionRewards, destinationRewards, successorProbability);
}
}
for (uint_fast64_t i = 0; i < iteratorList.size(); ++i) {
storm::jani::Edge const& edge = *iteratorList[i]->second;
// Add the state-action reward for the newly created choice. Note that edge assignments are all transient and we assume that no transient variables occur on the rhs of transient assignments, i.e., the assignment level does not matter here
auto valueIt = stateActionRewards.begin();
performTransientAssignments(edge.getAssignments().getTransientAssignments(), *this->evaluator, [&valueIt] (ValueType const& value) { *valueIt += value; ++valueIt; } );
}
}
template<typename ValueType, typename StateType>
@ -980,6 +1026,27 @@ namespace storm {
}
}
template<typename ValueType, typename StateType>
std::vector<ValueType> JaniNextStateGenerator<ValueType, StateType>::evaluateRewardExpressions() const {
std::vector<ValueType> result;
result.reserve(rewardVariables.size());
for (auto const& rewardExpression : rewardVariables) {
result.push_back(this->evaluator->asRational(rewardExpression.getExpression()));
}
return result;
}
template<typename ValueType, typename StateType>
void JaniNextStateGenerator<ValueType, StateType>::addEvaluatedRewardExpressions(std::vector<ValueType>& rewards, ValueType const& factor) const {
assert(rewards.size() == rewardVariables.size());
auto rewIt = rewards.begin();
for (auto const& rewardExpression : rewardVariables) {
(*rewIt) += factor * this->evaluator->asRational(rewardExpression.getExpression());
++rewIt;
}
}
template<typename ValueType, typename StateType>
void JaniNextStateGenerator<ValueType, StateType>::buildRewardModelInformation() {
// Prepare all reward model information structs.

12
src/storm/generator/JaniNextStateGenerator.h

@ -89,7 +89,7 @@ namespace storm {
* @params assignmentLevel The assignmentLevel that is to be considered for the update.
* @return The resulting state.
*/
void applyTransientUpdate(TransientVariableValuation<ValueType>& transientValuation, storm::jani::EdgeDestination const& destination, int64_t assignmentlevel, storm::expressions::ExpressionEvaluator<ValueType> const& expressionEvaluator);
void applyTransientUpdate(TransientVariableValuation<ValueType>& transientValuation, storm::jani::detail::ConstAssignments const& transientAssignments, storm::expressions::ExpressionEvaluator<ValueType> const& expressionEvaluator);
/*!
* Retrieves all choices possible from the given state.
@ -128,6 +128,16 @@ namespace storm {
*/
void performTransientAssignments(storm::jani::detail::ConstAssignments const& transientAssignments, storm::expressions::ExpressionEvaluator<ValueType> const& expressionEvaluator, std::function<void (ValueType const&)> const& callback);
/*!
* Evaluates the reward expressions using the current evaluator
*/
std::vector<ValueType> evaluateRewardExpressions() const;
/*!
* Evaluates the reward expressions using the current evaluator, multiplies them by the given factor and adds it to the given vector.
*/
void addEvaluatedRewardExpressions(std::vector<ValueType>& rewards, ValueType const& factor) const;
/*!
* Builds the information structs for the reward models.
*/

4
src/storm/generator/TransientVariableInformation.h

@ -64,6 +64,10 @@ namespace storm {
rationalValues.clear();
}
bool empty() const {
return booleanValues.empty() && integerValues.empty() && rationalValues.empty();
}
void setInEvaluator(storm::expressions::ExpressionEvaluator<ValueType>& evaluator, bool explorationChecks) const {
for (auto const& varValue : booleanValues) {
evaluator.setBooleanValue(varValue.first->variable, varValue.second);
Loading…
Cancel
Save