You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

682 lines
44 KiB

#include "src/modelchecker/reachability/SparseMdpLearningModelChecker.h"
#include "src/storage/SparseMatrix.h"
#include "src/storage/MaximalEndComponentDecomposition.h"
#include "src/logic/FragmentSpecification.h"
#include "src/utility/prism.h"
#include "src/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "src/models/sparse/StandardRewardModel.h"
#include "src/settings/SettingsManager.h"
#include "src/settings/modules/GeneralSettings.h"
#include "src/settings/modules/LearningSettings.h"
#include "src/utility/macros.h"
#include "src/exceptions/InvalidOperationException.h"
#include "src/exceptions/InvalidPropertyException.h"
#include "src/exceptions/NotSupportedException.h"
namespace storm {
namespace modelchecker {
template<typename ValueType>
SparseMdpLearningModelChecker<ValueType>::SparseMdpLearningModelChecker(storm::prism::Program const& program, boost::optional<std::map<storm::expressions::Variable, storm::expressions::Expression>> const& constantDefinitions) : program(storm::utility::prism::preprocessProgram<ValueType>(program, constantDefinitions)), variableInformation(this->program), randomGenerator(std::chrono::system_clock::now().time_since_epoch().count()), comparator() {
// Intentionally left empty.
}
template<typename ValueType>
bool SparseMdpLearningModelChecker<ValueType>::canHandle(CheckTask<storm::logic::Formula> const& checkTask) const {
storm::logic::Formula const& formula = checkTask.getFormula();
storm::logic::FragmentSpecification fragment = storm::logic::propositional().setProbabilityOperatorsAllowed(true).setReachabilityProbabilityFormulasAllowed(true).setNestedOperatorsAllowed(false);
return formula.isInFragment(fragment) && checkTask.isOnlyInitialStatesRelevantSet();
}
template<typename ValueType>
std::unique_ptr<CheckResult> SparseMdpLearningModelChecker<ValueType>::computeReachabilityProbabilities(CheckTask<storm::logic::EventuallyFormula> const& checkTask) {
storm::logic::EventuallyFormula const& eventuallyFormula = checkTask.getFormula();
storm::logic::Formula const& subformula = eventuallyFormula.getSubformula();
STORM_LOG_THROW(program.isDeterministicModel() || checkTask.isOptimizationDirectionSet(), storm::exceptions::InvalidPropertyException, "For nondeterministic systems, an optimization direction (min/max) must be given in the property.");
StateGeneration stateGeneration(program, variableInformation, getTargetStateExpression(subformula));
ExplorationInformation explorationInformation(variableInformation.getTotalBitOffset(true), storm::settings::learningSettings().isLocalECDetectionSet());
explorationInformation.optimizationDirection = checkTask.isOptimizationDirectionSet() ? checkTask.getOptimizationDirection() : storm::OptimizationDirection::Maximize;
// The first row group starts at action 0.
explorationInformation.newRowGroup(0);
// Create a callback for the next-state generator to enable it to request the index of states.
stateGeneration.stateToIdCallback = createStateToIdCallback(explorationInformation);
// Compute and return result.
std::tuple<StateType, ValueType, ValueType> boundsForInitialState = performLearningProcedure(stateGeneration, explorationInformation);
return std::make_unique<ExplicitQuantitativeCheckResult<ValueType>>(std::get<0>(boundsForInitialState), std::get<1>(boundsForInitialState));
}
template<typename ValueType>
storm::expressions::Expression SparseMdpLearningModelChecker<ValueType>::getTargetStateExpression(storm::logic::Formula const& subformula) const {
std::shared_ptr<storm::logic::Formula> preparedSubformula = subformula.substitute(program.getLabelToExpressionMapping());
storm::expressions::Expression result;
try {
result = preparedSubformula->toExpression();
} catch(storm::exceptions::InvalidOperationException const& e) {
STORM_LOG_THROW(false, storm::exceptions::InvalidPropertyException, "The property refers to unknown labels.");
}
return result;
}
template<typename ValueType>
std::function<typename SparseMdpLearningModelChecker<ValueType>::StateType (storm::generator::CompressedState const&)> SparseMdpLearningModelChecker<ValueType>::createStateToIdCallback(ExplorationInformation& explorationInformation) const {
return [&explorationInformation] (storm::generator::CompressedState const& state) -> StateType {
StateType newIndex = explorationInformation.stateStorage.numberOfStates;
// Check, if the state was already registered.
std::pair<uint32_t, std::size_t> actualIndexBucketPair = explorationInformation.stateStorage.stateToId.findOrAddAndGetBucket(state, newIndex);
if (actualIndexBucketPair.first == newIndex) {
explorationInformation.addUnexploredState(state);
}
return actualIndexBucketPair.first;
};
}
template<typename ValueType>
std::tuple<typename SparseMdpLearningModelChecker<ValueType>::StateType, ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::performLearningProcedure(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation) const {
// Generate the initial state so we know where to start the simulation.
explorationInformation.setInitialStates(stateGeneration.getInitialStates());
STORM_LOG_THROW(explorationInformation.getNumberOfInitialStates() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine.");
StateType initialStateIndex = explorationInformation.getFirstInitialState();
// Create a structure that holds the bounds for the states and actions.
BoundValues bounds;
// Create a stack that is used to track the path we sampled.
StateActionStack stack;
// Now perform the actual sampling.
Statistics stats;
bool convergenceCriterionMet = false;
while (!convergenceCriterionMet) {
bool result = samplePathFromInitialState(stateGeneration, explorationInformation, stack, bounds, stats);
// If a terminal state was found, we update the probabilities along the path contained in the stack.
if (result) {
// Update the bounds along the path to the terminal state.
STORM_LOG_TRACE("Found terminal state, updating probabilities along path.");
updateProbabilityBoundsAlongSampledPath(stack, explorationInformation, bounds);
} else {
// If not terminal state was found, the search aborted, possibly because of an EC-detection. In this
// case, we cannot update the probabilities.
STORM_LOG_TRACE("Did not find terminal state.");
}
STORM_LOG_DEBUG("Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << " explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored).");
STORM_LOG_DEBUG("Value of initial state is in [" << bounds.getLowerBoundForState(initialStateIndex, explorationInformation) << ", " << bounds.getUpperBoundForState(initialStateIndex, explorationInformation) << "].");
ValueType difference = bounds.getDifferenceOfStateBounds(initialStateIndex, explorationInformation);
STORM_LOG_DEBUG("Difference after iteration " << stats.iterations << " is " << difference << ".");
convergenceCriterionMet = comparator.isZero(difference);
++stats.iterations;
}
if (storm::settings::generalSettings().isShowStatisticsSet()) {
std::cout << std::endl << "Learning summary -------------------------" << std::endl;
std::cout << "Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << " explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored, " << stats.numberOfTargetStates << " target)" << std::endl;
std::cout << "Sampled paths: " << stats.iterations << std::endl;
std::cout << "Maximal path length: " << stats.maxPathLength << std::endl;
std::cout << "EC detections: " << stats.ecDetections << " (" << stats.failedEcDetections << " failed, " << stats.totalNumberOfEcDetected << " EC(s) detected)" << std::endl;
}
return std::make_tuple(initialStateIndex, bounds.getLowerBoundForState(initialStateIndex, explorationInformation), bounds.getUpperBoundForState(initialStateIndex, explorationInformation));
}
template<typename ValueType>
bool SparseMdpLearningModelChecker<ValueType>::samplePathFromInitialState(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation, StateActionStack& stack, BoundValues& bounds, Statistics& stats) const {
// Start the search from the initial state.
stack.push_back(std::make_pair(explorationInformation.getFirstInitialState(), 0));
// As long as we didn't find a terminal (accepting or rejecting) state in the search, sample a new successor.
bool foundTerminalState = false;
while (!foundTerminalState) {
StateType const& currentStateId = stack.back().first;
STORM_LOG_TRACE("State on top of stack is: " << currentStateId << ".");
// If the state is not yet explored, we need to retrieve its behaviors.
auto unexploredIt = explorationInformation.unexploredStates.find(currentStateId);
if (unexploredIt != explorationInformation.unexploredStates.end()) {
STORM_LOG_TRACE("State was not yet explored.");
// Explore the previously unexplored state.
storm::generator::CompressedState const& compressedState = unexploredIt->second;
foundTerminalState = exploreState(stateGeneration, currentStateId, compressedState, explorationInformation, bounds, stats);
explorationInformation.unexploredStates.erase(unexploredIt);
} else {
// If the state was already explored, we check whether it is a terminal state or not.
if (explorationInformation.isTerminal(currentStateId)) {
STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << ".");
foundTerminalState = true;
}
}
// If the state was not a terminal state, we continue the path search and sample the next state.
if (!foundTerminalState) {
// At this point, we can be sure that the state was expanded and that we can sample according to the
// probabilities in the matrix.
uint32_t chosenAction = sampleActionOfState(currentStateId, explorationInformation, bounds);
stack.back().second = chosenAction;
STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << ".");
StateType successor = sampleSuccessorFromAction(chosenAction, explorationInformation);
STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << ".");
// Put the successor state and a dummy action on top of the stack.
stack.emplace_back(successor, 0);
stats.maxPathLength = std::max(stats.maxPathLength, stack.size());
// If the current path length exceeds the threshold and the model is a nondeterministic one, we
// perform an EC detection.
if (stack.size() > explorationInformation.getPathLengthUntilEndComponentDetection()) {
bool success = detectEndComponents(stack, explorationInformation, bounds, stats);
// Only if the detection found an EC, we abort the search.
if (success) {
// Abort the current search.
STORM_LOG_TRACE("Aborting the search after succesful EC detection.");
stack.clear();
break;
}
}
}
}
return foundTerminalState;
}
template<typename ValueType>
bool SparseMdpLearningModelChecker<ValueType>::exploreState(StateGeneration& stateGeneration, StateType const& currentStateId, storm::generator::CompressedState const& currentState, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const {
bool isTerminalState = false;
bool isTargetState = false;
++stats.numberOfExploredStates;
// Finally, map the unexplored state to the row group.
explorationInformation.assignStateToNextRowGroup(currentStateId);
STORM_LOG_TRACE("Assigning row group " << explorationInformation.getRowGroup(currentStateId) << " to state " << currentStateId << ".");
// Initialize the bounds, because some of the following computations depend on the values to be available for
// all states that have been assigned to a row-group.
bounds.initializeBoundsForNextState();
// Before generating the behavior of the state, we need to determine whether it's a target state that
// does not need to be expanded.
stateGeneration.generator.load(currentState);
if (stateGeneration.isTargetState()) {
++stats.numberOfTargetStates;
isTargetState = true;
isTerminalState = true;
} else {
STORM_LOG_TRACE("Exploring state.");
// If it needs to be expanded, we use the generator to retrieve the behavior of the new state.
storm::generator::StateBehavior<ValueType, StateType> behavior = stateGeneration.expand();
STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices.");
// Clumsily check whether we have found a state that forms a trivial BMEC.
bool otherSuccessor = false;
for (auto const& choice : behavior) {
for (auto const& entry : choice) {
if (entry.first != currentStateId) {
otherSuccessor = true;
break;
}
}
}
isTerminalState = !otherSuccessor;
// If the state was neither a trivial (non-accepting) terminal state nor a target state, we
// need to store its behavior.
if (!isTerminalState) {
// Next, we insert the behavior into our matrix structure.
StateType startRow = explorationInformation.matrix.size();
explorationInformation.addRowsToMatrix(behavior.getNumberOfChoices());
ActionType currentAction = 0;
// Retrieve the lowest state bounds (wrt. to the current optimization direction).
std::pair<ValueType, ValueType> stateBounds = getLowestBounds(explorationInformation.optimizationDirection);
for (auto const& choice : behavior) {
for (auto const& entry : choice) {
explorationInformation.getRowOfMatrix(startRow + currentAction).emplace_back(entry.first, entry.second);
}
std::pair<ValueType, ValueType> actionBounds = computeBoundsOfAction(startRow + currentAction, explorationInformation, bounds);
bounds.initializeBoundsForNextAction(actionBounds);
stateBounds = combineBounds(explorationInformation.optimizationDirection, stateBounds, actionBounds);
STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << bounds.getLowerBoundForAction(startRow + currentAction) << " and " << bounds.getUpperBoundForAction(startRow + currentAction) << ".");
++currentAction;
}
// Terminate the row group.
explorationInformation.rowGroupIndices.push_back(explorationInformation.matrix.size());
bounds.setBoundsForState(currentStateId, explorationInformation, stateBounds);
STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << bounds.getLowerBoundForState(currentStateId, explorationInformation) << " and " << bounds.getUpperBoundForState(currentStateId, explorationInformation) << ".");
}
}
if (isTerminalState) {
STORM_LOG_TRACE("State does not need to be explored, because it is " << (isTargetState ? "a target state" : "a rejecting terminal state") << ".");
explorationInformation.addTerminalState(currentStateId);
if (isTargetState) {
bounds.setBoundsForState(currentStateId, explorationInformation, std::make_pair(storm::utility::one<ValueType>(), storm::utility::one<ValueType>()));
bounds.initializeBoundsForNextAction(std::make_pair(storm::utility::one<ValueType>(), storm::utility::one<ValueType>()));
} else {
bounds.setBoundsForState(currentStateId, explorationInformation, std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>()));
bounds.initializeBoundsForNextAction(std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>()));
}
// Increase the size of the matrix, but leave the row empty.
explorationInformation.addRowsToMatrix(1);
// Terminate the row group.
explorationInformation.newRowGroup();
}
return isTerminalState;
}
template<typename ValueType>
typename SparseMdpLearningModelChecker<ValueType>::ActionType SparseMdpLearningModelChecker<ValueType>::sampleActionOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues& bounds) const {
// Determine the values of all available actions.
std::vector<std::pair<ActionType, ValueType>> actionValues;
StateType rowGroup = explorationInformation.getRowGroup(currentStateId);
// Check for cases in which we do not need to perform more work.
if (explorationInformation.onlyOneActionAvailable(rowGroup)) {
return explorationInformation.getStartRowOfGroup(rowGroup);
}
// If there are more choices to consider, start by gathering the values of relevant actions.
STORM_LOG_TRACE("Sampling from actions leaving the state.");
for (uint32_t row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) {
actionValues.push_back(std::make_pair(row, bounds.getBoundForAction(explorationInformation.optimizationDirection, row)));
}
STORM_LOG_ASSERT(!actionValues.empty(), "Values for actions must not be empty.");
// Sort the actions wrt. to the optimization direction.
if (explorationInformation.maximize()) {
std::sort(actionValues.begin(), actionValues.end(), [] (std::pair<ActionType, ValueType> const& a, std::pair<ActionType, ValueType> const& b) { return a.second > b.second; } );
} else {
std::sort(actionValues.begin(), actionValues.end(), [] (std::pair<ActionType, ValueType> const& a, std::pair<ActionType, ValueType> const& b) { return a.second < b.second; } );
}
// Determine the first elements of the sorted range that agree on their value.
auto end = ++actionValues.begin();
while (end != actionValues.end() && comparator.isEqual(actionValues.begin()->second, end->second)) {
++end;
}
// Now sample from all maximizing actions.
std::uniform_int_distribution<ActionType> distribution(0, std::distance(actionValues.begin(), end) - 1);
return actionValues[distribution(randomGenerator)].first;
}
template<typename ValueType>
typename SparseMdpLearningModelChecker<ValueType>::StateType SparseMdpLearningModelChecker<ValueType>::sampleSuccessorFromAction(ActionType const& chosenAction, ExplorationInformation const& explorationInformation) const {
// TODO: precompute this?
std::vector<ValueType> probabilities(explorationInformation.getRowOfMatrix(chosenAction).size());
std::transform(explorationInformation.getRowOfMatrix(chosenAction).begin(), explorationInformation.getRowOfMatrix(chosenAction).end(), probabilities.begin(), [] (storm::storage::MatrixEntry<StateType, ValueType> const& entry) { return entry.getValue(); } );
// Now sample according to the probabilities.
std::discrete_distribution<StateType> distribution(probabilities.begin(), probabilities.end());
StateType offset = distribution(randomGenerator);
return explorationInformation.getRowOfMatrix(chosenAction)[offset].getColumn();
}
template<typename ValueType>
bool SparseMdpLearningModelChecker<ValueType>::detectEndComponents(StateActionStack const& stack, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const {
STORM_LOG_TRACE("Starting " << (explorationInformation.useLocalECDetection() ? "local" : "global") << "EC detection.");
++stats.ecDetections;
// Outline:
// 1. construct a sparse transition matrix of the relevant part of the state space.
// 2. use this matrix to compute an MEC decomposition.
// 3. if non-empty, analyze the decomposition for accepting/rejecting MECs.
// Start with 1.
storm::storage::SparseMatrixBuilder<ValueType> builder(0, 0, 0, false, true, 0);
// Determine the set of states that was expanded.
std::vector<StateType> relevantStates;
if (explorationInformation.useLocalECDetection()) {
for (auto const& stateActionPair : stack) {
if (explorationInformation.maximize() || !comparator.isOne(bounds.getLowerBoundForState(stateActionPair.first, explorationInformation))) {
relevantStates.push_back(stateActionPair.first);
}
}
std::sort(relevantStates.begin(), relevantStates.end());
auto newEnd = std::unique(relevantStates.begin(), relevantStates.end());
relevantStates.resize(std::distance(relevantStates.begin(), newEnd));
} else {
for (StateType state = 0; state < explorationInformation.stateStorage.numberOfStates; ++state) {
// Add the state to the relevant states if it's unexplored. Additionally, if we are computing minimal
// probabilities, we only consider it relevant if it's not a target state.
if (!explorationInformation.isUnexplored(state) && (explorationInformation.maximize() || !comparator.isOne(bounds.getLowerBoundForState(state, explorationInformation)))) {
relevantStates.push_back(state);
}
}
}
StateType sink = relevantStates.size();
// Create a mapping for faster look-up during the translation of flexible matrix to the real sparse matrix.
std::unordered_map<StateType, StateType> relevantStateToNewRowGroupMapping;
for (StateType index = 0; index < relevantStates.size(); ++index) {
relevantStateToNewRowGroupMapping.emplace(relevantStates[index], index);
}
// Do the actual translation.
StateType currentRow = 0;
for (auto const& state : relevantStates) {
builder.newRowGroup(currentRow);
StateType rowGroup = explorationInformation.getRowGroup(state);
for (auto row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) {
ValueType unexpandedProbability = storm::utility::zero<ValueType>();
for (auto const& entry : explorationInformation.getRowOfMatrix(row)) {
auto it = relevantStateToNewRowGroupMapping.find(entry.getColumn());
if (it != relevantStateToNewRowGroupMapping.end()) {
// If the entry is a relevant state, we copy it over (and compensate for the offset change).
builder.addNextValue(currentRow, it->second, entry.getValue());
} else {
// If the entry is an unexpanded state, we gather the probability to later redirect it to an unexpanded sink.
unexpandedProbability += entry.getValue();
}
}
if (unexpandedProbability != storm::utility::zero<ValueType>()) {
builder.addNextValue(currentRow, sink, unexpandedProbability);
}
++currentRow;
}
}
// Then, make the unexpanded state absorbing.
builder.newRowGroup(currentRow);
builder.addNextValue(currentRow, sink, storm::utility::one<ValueType>());
STORM_LOG_TRACE("Successfully built matrix for MEC decomposition.");
// Go on to step 2.
storm::storage::SparseMatrix<ValueType> relevantStatesMatrix = builder.build();
storm::storage::MaximalEndComponentDecomposition<ValueType> mecDecomposition(relevantStatesMatrix, relevantStatesMatrix.transpose(true));
STORM_LOG_TRACE("Successfully computed MEC decomposition. Found " << (mecDecomposition.size() > 1 ? (mecDecomposition.size() - 1) : 0) << " MEC(s).");
// If the decomposition contains only the MEC consisting of the sink state, we count it as 'failed'.
if (mecDecomposition.size() <= 1) {
++stats.failedEcDetections;
// explorationInformation.increasePathLengthUntilEndComponentDetection();
return false;
} else {
stats.totalNumberOfEcDetected += mecDecomposition.size() - 1;
// 3. Analyze the MEC decomposition.
for (auto const& mec : mecDecomposition) {
// Ignore the (expected) MEC of the sink state.
if (mec.containsState(sink)) {
continue;
}
if (explorationInformation.maximize()) {
analyzeMecForMaximalProbabilities(mec, relevantStates, relevantStatesMatrix, explorationInformation, bounds);
} else {
analyzeMecForMinimalProbabilities(mec, relevantStates, relevantStatesMatrix, explorationInformation, bounds);
}
}
}
return true;
}
template<typename ValueType>
void SparseMdpLearningModelChecker<ValueType>::analyzeMecForMaximalProbabilities(storm::storage::MaximalEndComponent const& mec, std::vector<StateType> const& relevantStates, storm::storage::SparseMatrix<ValueType> const& relevantStatesMatrix, ExplorationInformation& explorationInformation, BoundValues& bounds) const {
// For maximal probabilities, we check (a) which MECs contain a target state, because the associated states
// have a probability of 1 (and we can therefore set their lower bounds to 1) and (b) which of the remaining
// MECs have no outgoing action, because the associated states have a probability of 0 (and we can therefore
// set their upper bounds to 0).
bool containsTargetState = false;
// Now we record all choices leaving the EC.
std::vector<ActionType> leavingActions;
for (auto const& stateAndChoices : mec) {
// Compute the state of the original model that corresponds to the current state.
StateType originalState = relevantStates[stateAndChoices.first];
StateType originalRowGroup = explorationInformation.getRowGroup(originalState);
// Check whether a target state is contained in the MEC.
if (!containsTargetState && comparator.isOne(bounds.getLowerBoundForRowGroup(originalRowGroup))) {
containsTargetState = true;
}
// For each state, compute the actions that leave the MEC.
auto includedChoicesIt = stateAndChoices.second.begin();
auto includedChoicesIte = stateAndChoices.second.end();
for (auto action = explorationInformation.getStartRowOfGroup(originalRowGroup); action < explorationInformation.getStartRowOfGroup(originalRowGroup + 1); ++action) {
if (includedChoicesIt != includedChoicesIte) {
STORM_LOG_TRACE("Next (local) choice contained in MEC is " << (*includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]));
STORM_LOG_TRACE("Current (local) choice iterated is " << (action - explorationInformation.getStartRowOfGroup(originalRowGroup)));
if (action - explorationInformation.getStartRowOfGroup(originalRowGroup) != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) {
STORM_LOG_TRACE("Choice leaves the EC.");
leavingActions.push_back(action);
} else {
STORM_LOG_TRACE("Choice stays in the EC.");
++includedChoicesIt;
}
} else {
STORM_LOG_TRACE("Choice leaves the EC, because there is no more choice staying in the EC.");
leavingActions.push_back(action);
}
}
}
// If one of the states of the EC is a target state, all states in the EC have probability 1.
if (containsTargetState) {
STORM_LOG_TRACE("MEC contains a target state.");
for (auto const& stateAndChoices : mec) {
// Compute the state of the original model that corresponds to the current state.
StateType const& originalState = relevantStates[stateAndChoices.first];
STORM_LOG_TRACE("Setting lower bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 1.");
bounds.setLowerBoundForState(originalState, explorationInformation, storm::utility::one<ValueType>());
explorationInformation.addTerminalState(originalState);
}
} else if (leavingActions.empty()) {
STORM_LOG_TRACE("MEC's leaving choices are empty.");
// If there is no choice leaving the EC, but it contains no target state, all states have probability 0.
for (auto const& stateAndChoices : mec) {
// Compute the state of the original model that corresponds to the current state.
StateType const& originalState = relevantStates[stateAndChoices.first];
STORM_LOG_TRACE("Setting upper bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 0.");
bounds.setUpperBoundForState(originalState, explorationInformation, storm::utility::zero<ValueType>());
explorationInformation.addTerminalState(originalState);
}
} else {
// In this case, no target state is contained in the MEC, but there are actions leaving the MEC. To
// prevent the simulation getting stuck in this MEC again, we replace all states in the MEC by a new
// state whose outgoing actions are the ones leaving the MEC. We do this, by assigning all states in the
// MEC to a new row group, which will then hold all the outgoing choices.
// Remap all contained states to the new row group.
StateType nextRowGroup = explorationInformation.getNextRowGroup();
for (auto const& stateAndChoices : mec) {
explorationInformation.assignStateToRowGroup(stateAndChoices.first, nextRowGroup);
}
bounds.initializeBoundsForNextState();
// Add to the new row group all leaving actions of contained states and set the appropriate bounds for
// the actions and the new state.
std::pair<ValueType, ValueType> stateBounds = getLowestBounds(explorationInformation.optimizationDirection);
for (auto const& action : leavingActions) {
explorationInformation.matrix.emplace_back(std::move(explorationInformation.matrix[action]));
std::pair<ValueType, ValueType> const& actionBounds = bounds.getBoundsForAction(action);
bounds.initializeBoundsForNextAction(actionBounds);
stateBounds = combineBounds(explorationInformation.optimizationDirection, stateBounds, actionBounds);
}
// Terminate the row group of the newly introduced state.
explorationInformation.rowGroupIndices.push_back(explorationInformation.matrix.size());
}
}
template<typename ValueType>
void SparseMdpLearningModelChecker<ValueType>::analyzeMecForMinimalProbabilities(storm::storage::MaximalEndComponent const& mec, std::vector<StateType> const& relevantStates, storm::storage::SparseMatrix<ValueType> const& relevantStatesMatrix, ExplorationInformation& explorationInformation, BoundValues& bounds) const {
// For minimal probabilities, all found MECs are guaranteed to not contain a target state. Hence, in all
// associated states, the probability is 0 and we can set the upper bounds of the states to 0).
for (auto const& stateAndChoices : mec) {
// Compute the state of the original model that corresponds to the current state.
StateType originalState = relevantStates[stateAndChoices.first];
bounds.setUpperBoundForState(originalState, explorationInformation, storm::utility::zero<ValueType>());
explorationInformation.addTerminalState(originalState);
}
}
template<typename ValueType>
ValueType SparseMdpLearningModelChecker<ValueType>::computeLowerBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
ValueType result = storm::utility::zero<ValueType>();
for (auto const& element : explorationInformation.getRowOfMatrix(action)) {
result += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation);
}
return result;
}
template<typename ValueType>
ValueType SparseMdpLearningModelChecker<ValueType>::computeUpperBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
ValueType result = storm::utility::zero<ValueType>();
for (auto const& element : explorationInformation.getRowOfMatrix(action)) {
result += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation);
}
return result;
}
template<typename ValueType>
std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::computeBoundsOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
// TODO: take into account self-loops?
std::pair<ValueType, ValueType> result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
for (auto const& element : explorationInformation.getRowOfMatrix(action)) {
result.first += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation);
result.second += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation);
}
return result;
}
template<typename ValueType>
std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::computeBoundsOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
StateType group = explorationInformation.getRowGroup(currentStateId);
std::pair<ValueType, ValueType> result = getLowestBounds(explorationInformation.optimizationDirection);
for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) {
std::pair<ValueType, ValueType> actionValues = computeBoundsOfAction(action, explorationInformation, bounds);
result = combineBounds(explorationInformation.optimizationDirection, result, actionValues);
}
return result;
}
template<typename ValueType>
void SparseMdpLearningModelChecker<ValueType>::updateProbabilityBoundsAlongSampledPath(StateActionStack& stack, ExplorationInformation const& explorationInformation, BoundValues& bounds) const {
stack.pop_back();
while (!stack.empty()) {
updateProbabilityOfAction(stack.back().first, stack.back().second, explorationInformation, bounds);
stack.pop_back();
}
}
template<typename ValueType>
void SparseMdpLearningModelChecker<ValueType>::updateProbabilityOfAction(StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues& bounds) const {
// Compute the new lower/upper values of the action.
std::pair<ValueType, ValueType> newBoundsForAction = computeBoundsOfAction(action, explorationInformation, bounds);
// And set them as the current value.
bounds.setBoundsForAction(action, newBoundsForAction);
// Check if we need to update the values for the states.
if (explorationInformation.maximize()) {
bounds.setLowerBoundOfStateIfGreaterThanOld(state, explorationInformation, newBoundsForAction.first);
StateType rowGroup = explorationInformation.getRowGroup(state);
if (newBoundsForAction.second < bounds.getUpperBoundForRowGroup(rowGroup)) {
if (explorationInformation.getRowGroupSize(rowGroup) > 1) {
newBoundsForAction.second = std::max(newBoundsForAction.second, computeBoundOverAllOtherActions(storm::OptimizationDirection::Maximize, state, action, explorationInformation, bounds));
}
bounds.setUpperBoundForRowGroup(rowGroup, newBoundsForAction.second);
}
} else {
bounds.setUpperBoundOfStateIfLessThanOld(state, explorationInformation, newBoundsForAction.second);
StateType rowGroup = explorationInformation.getRowGroup(state);
if (bounds.getLowerBoundForRowGroup(rowGroup) < newBoundsForAction.first) {
if (explorationInformation.getRowGroupSize(rowGroup) > 1) {
ValueType min = computeBoundOverAllOtherActions(storm::OptimizationDirection::Minimize, state, action, explorationInformation, bounds);
newBoundsForAction.first = std::min(newBoundsForAction.first, min);
}
bounds.setLowerBoundForRowGroup(rowGroup, newBoundsForAction.first);
}
}
}
template<typename ValueType>
ValueType SparseMdpLearningModelChecker<ValueType>::computeBoundOverAllOtherActions(storm::OptimizationDirection const& direction, StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
ValueType bound = getLowestBound(explorationInformation.optimizationDirection);
ActionType group = explorationInformation.getRowGroup(state);
for (auto currentAction = explorationInformation.getStartRowOfGroup(group); currentAction < explorationInformation.getStartRowOfGroup(group + 1); ++currentAction) {
if (currentAction == action) {
continue;
}
if (direction == storm::OptimizationDirection::Maximize) {
bound = std::max(bound, computeUpperBoundOfAction(currentAction, explorationInformation, bounds));
} else {
bound = std::min(bound, computeLowerBoundOfAction(currentAction, explorationInformation, bounds));
}
}
return bound;
}
template<typename ValueType>
std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::getLowestBounds(storm::OptimizationDirection const& direction) const {
ValueType val = getLowestBound(direction);
return std::make_pair(val, val);
}
template<typename ValueType>
ValueType SparseMdpLearningModelChecker<ValueType>::getLowestBound(storm::OptimizationDirection const& direction) const {
if (direction == storm::OptimizationDirection::Maximize) {
return storm::utility::zero<ValueType>();
} else {
return storm::utility::one<ValueType>();
}
}
template<typename ValueType>
std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::combineBounds(storm::OptimizationDirection const& direction, std::pair<ValueType, ValueType> const& bounds1, std::pair<ValueType, ValueType> const& bounds2) const {
if (direction == storm::OptimizationDirection::Maximize) {
return std::make_pair(std::max(bounds1.first, bounds2.first), std::max(bounds1.second, bounds2.second));
} else {
return std::make_pair(std::min(bounds1.first, bounds2.first), std::min(bounds1.second, bounds2.second));
}
}
template class SparseMdpLearningModelChecker<double>;
}
}