From 509243532932ae81b6c2851de4826a1c1afd0b37 Mon Sep 17 00:00:00 2001 From: dehnert Date: Tue, 5 Apr 2016 18:25:47 +0200 Subject: [PATCH] started refactoring learning model checker Former-commit-id: b9e6015ae40a7a5425f1a5d6c725bf14471898ee --- .../SparseMdpLearningModelChecker.cpp | 852 ++++++++---------- .../SparseMdpLearningModelChecker.h | 274 +++++- 2 files changed, 621 insertions(+), 505 deletions(-) diff --git a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp index 72c7178a0..67d3c6476 100644 --- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp +++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp @@ -1,11 +1,8 @@ #include "src/modelchecker/reachability/SparseMdpLearningModelChecker.h" #include "src/storage/SparseMatrix.h" -#include "src/storage/sparse/StateStorage.h" #include "src/storage/MaximalEndComponentDecomposition.h" -#include "src/generator/PrismNextStateGenerator.h" - #include "src/logic/FragmentSpecification.h" #include "src/utility/prism.h" @@ -18,6 +15,7 @@ #include "src/settings/modules/GeneralSettings.h" #include "src/utility/macros.h" +#include "src/exceptions/InvalidPropertyException.h" #include "src/exceptions/NotSupportedException.h" namespace storm { @@ -36,239 +34,297 @@ namespace storm { } template - void SparseMdpLearningModelChecker::updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const { - // Find out which row of the matrix we have to consider for the given action. - StateType sourceRow = action; - - // Compute the new lower/upper values of the action. - ValueType newLowerValue = storm::utility::zero(); - ValueType newUpperValue = storm::utility::zero(); - -// boost::optional loopProbability; - for (auto const& element : transitionMatrix[sourceRow]) { - // If the element is a self-loop, we treat the probability by a proper rescaling after the loop. -// if (element.getColumn() == sourceStateId) { -// STORM_LOG_TRACE("Found self-loop with probability " << element.getValue() << "."); -// loopProbability = element.getValue(); -// continue; -// } - - std::cout << "lower += " << element.getValue() << " * state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl; - if (stateToRowGroupMapping[element.getColumn()] != unexploredMarker) { - std::cout << "upper bounds per state @ " << stateToRowGroupMapping[element.getColumn()] << " is " << upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]] << std::endl; - } - std::cout << "upper += " << element.getValue() << " * state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl; - newLowerValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); - newUpperValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); - std::cout << "after iter " << newLowerValue << " and " << newUpperValue << std::endl; - } + std::unique_ptr SparseMdpLearningModelChecker::computeReachabilityProbabilities(CheckTask const& checkTask) { + storm::logic::EventuallyFormula const& eventuallyFormula = checkTask.getFormula(); + storm::logic::Formula const& subformula = eventuallyFormula.getSubformula(); + STORM_LOG_THROW(program.isDeterministicModel() || checkTask.isOptimizationDirectionSet(), storm::exceptions::InvalidPropertyException, "For nondeterministic systems, an optimization direction (min/max) must be given in the property."); + STORM_LOG_THROW(subformula.isAtomicExpressionFormula() || subformula.isAtomicLabelFormula(), storm::exceptions::NotSupportedException, "Learning engine can only deal with formulas of the form 'F \"label\"' or 'F expression'."); - // If there was a self-loop with probability p, we scale the probabilities by 1/(1-p). -// if (loopProbability) { -// STORM_LOG_TRACE("Scaling values " << newLowerValue << " and " << newUpperValue << " with " << (storm::utility::one()/(storm::utility::one() - loopProbability.get())) << "."); -// newLowerValue = newLowerValue / (storm::utility::one() - loopProbability.get()); -// newUpperValue = newUpperValue / (storm::utility::one() - loopProbability.get()); -// } + StateGeneration stateGeneration(storm::generator::PrismNextStateGenerator(program, variableInformation, false), getTargetStateExpression(subformula)); - // And set them as the current value. - std::cout << newLowerValue << " vs " << newUpperValue << std::endl; - STORM_LOG_ASSERT(newLowerValue <= newUpperValue, "Lower bound must always be smaller or equal than upper bound."); - STORM_LOG_TRACE("Updating lower value of action " << action << " of state " << sourceStateId << " to " << newLowerValue << " (was " << lowerBoundsPerAction[action] << ")."); - lowerBoundsPerAction[action] = newLowerValue; - STORM_LOG_TRACE("Updating upper value of action " << action << " of state " << sourceStateId << " to " << newUpperValue << " (was " << upperBoundsPerAction[action] << ")."); - upperBoundsPerAction[action] = newUpperValue; + ExplorationInformation explorationInformation(variableInformation.getTotalBitOffset(true)); + explorationInformation.optimizationDirection = checkTask.isOptimizationDirectionSet() ? checkTask.getOptimizationDirection() : storm::OptimizationDirection::Maximize; - // Check if we need to update the values for the states. - if (lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] < newLowerValue) { - STORM_LOG_TRACE("Got new lower bound for state " << sourceStateId << ": " << newLowerValue << " (was " << lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] << ")."); - lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] = newLowerValue; - } - - uint32_t sourceRowGroup = stateToRowGroupMapping[sourceStateId]; - if (newUpperValue < upperBoundsPerState[sourceRowGroup]) { - if (rowGroupIndices[sourceRowGroup + 1] - rowGroupIndices[sourceRowGroup] > 1) { - ValueType max = storm::utility::zero(); - - for (uint32_t currentAction = rowGroupIndices[sourceRowGroup]; currentAction < rowGroupIndices[sourceRowGroup + 1]; ++currentAction) { - std::cout << "cur: " << currentAction << std::endl; - if (currentAction == action) { - continue; - } - - ValueType currentValue = storm::utility::zero(); - for (auto const& element : transitionMatrix[currentAction]) { - currentValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); - } - max = std::max(max, currentValue); - std::cout << "max is " << max << std::endl; - } - - newUpperValue = std::max(newUpperValue, max); - } - - if (newUpperValue < upperBoundsPerState[sourceRowGroup]) { - STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << " (was " << upperBoundsPerState[sourceRowGroup] << ")."); - std::cout << "writing at index " << sourceRowGroup << std::endl; - upperBoundsPerState[sourceRowGroup] = newUpperValue; - } - } - } - - template - void SparseMdpLearningModelChecker::updateProbabilitiesUsingStack(std::vector>& stateActionStack, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const { + // The first row group starts at action 0. + explorationInformation.newRowGroup(0); - std::cout << "stack is:" << std::endl; - for (auto const& el : stateActionStack) { - std::cout << el.first << "-[" << el.second << "]-> "; - } - std::cout << std::endl; + // Create a callback for the next-state generator to enable it to request the index of states. + std::function stateToIdCallback = createStateToIdCallback(explorationInformation); - stateActionStack.pop_back(); - while (!stateActionStack.empty()) { - updateProbabilities(stateActionStack.back().first, stateActionStack.back().second, transitionMatrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); - stateActionStack.pop_back(); - } + // Compute and return result. + std::tuple boundsForInitialState = performLearningProcedure(stateGeneration, explorationInformation); + return std::make_unique>(std::get<0>(boundsForInitialState), std::get<1>(boundsForInitialState)); } template - std::pair SparseMdpLearningModelChecker::computeValuesOfChoice(uint32_t action, std::vector>> const& transitionMatrix, std::vector const& stateToRowGroupMapping, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker) { - std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); - for (auto const& element : transitionMatrix[action]) { - result.first += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); - result.second += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + storm::expressions::Expression SparseMdpLearningModelChecker::getTargetStateExpression(storm::logic::Formula const& subformula) const { + storm::expressions::Expression result; + if (subformula.isAtomicExpressionFormula()) { + result = subformula.asAtomicExpressionFormula().getExpression(); + } else { + result = program.getLabelExpression(subformula.asAtomicLabelFormula().getLabel()); } return result; } template - std::pair SparseMdpLearningModelChecker::computeValuesOfState(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& lowerBounds, std::vector const& upperBounds, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker) { - StateType sourceRowGroup = stateToRowGroupMapping[currentStateId]; - std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); - for (uint32_t choice = rowGroupIndices[sourceRowGroup]; choice < rowGroupIndices[sourceRowGroup + 1]; ++choice) { - std::pair choiceValues = computeValuesOfChoice(choice, transitionMatrix, stateToRowGroupMapping, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); - result.first = std::max(choiceValues.first, result.first); - result.second = std::max(choiceValues.second, result.second); - } - return result; + std::function::StateType (storm::generator::CompressedState const&)> SparseMdpLearningModelChecker::createStateToIdCallback(ExplorationInformation& explorationInformation) const { + return [&explorationInformation] (storm::generator::CompressedState const& state) -> StateType { + StateType newIndex = explorationInformation.stateStorage.numberOfStates; + + // Check, if the state was already registered. + std::pair actualIndexBucketPair = explorationInformation.stateStorage.stateToId.findOrAddAndGetBucket(state, newIndex); + + if (actualIndexBucketPair.first == newIndex) { + explorationInformation.addUnexploredState(state); + } + + return actualIndexBucketPair.first; + }; } template - uint32_t SparseMdpLearningModelChecker::sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& upperBoundsPerState, std::unordered_map const& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) { + std::tuple::StateType, ValueType, ValueType> SparseMdpLearningModelChecker::performLearningProcedure(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation) const { - StateType rowGroup = stateToRowGroupMapping[currentStateId]; + // Generate the initial state so we know where to start the simulation. + explorationInformation.setInitialStates(stateGeneration.getInitialStates()); + STORM_LOG_THROW(explorationInformation.getNumberOfInitialStates() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine."); + StateType initialStateIndex = explorationInformation.getFirstInitialState(); - // First, determine all maximizing actions. - std::vector allMaxActions; + // Create a structure that holds the bounds for the states and actions. + BoundValues bounds; + + // Create a stack that is used to track the path we sampled. + StateActionStack stack; - for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { - if (stateToRowGroupMapping[state] != unexploredMarker) { - std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [x, " << upperBoundsPerState[stateToRowGroupMapping[state]] << "]" << std::endl; + // Now perform the actual sampling. + Statistics stats; + bool convergenceCriterionMet = false; + while (!convergenceCriterionMet) { + bool result = samplePathFromState(stateGeneration, explorationInformation, stack, bounds, stats); + + // If a terminal state was found, we update the probabilities along the path contained in the stack. + if (result) { + // Update the bounds along the path to the terminal state. + STORM_LOG_TRACE("Found terminal state, updating probabilities along path."); + updateProbabilityBoundsAlongSampledPath(stack, explorationInformation, bounds); } else { - std::cout << "state " << state << " is unexplored" << std::endl; + // If not terminal state was found, the search aborted, possibly because of an EC-detection. In this + // case, we cannot update the probabilities. + STORM_LOG_TRACE("Did not find terminal state."); } + + STORM_LOG_DEBUG("Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << "explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored)."); + STORM_LOG_DEBUG("Value of initial state is in [" << bounds.getLowerBoundForState(initialStateIndex, explorationInformation) << ", " << bounds.getUpperBoundForState(initialStateIndex, explorationInformation) << "]."); + ValueType difference = bounds.getDifferenceOfStateBounds(initialStateIndex, explorationInformation); + STORM_LOG_DEBUG("Difference after iteration " << stats.iterations << " is " << difference << "."); + convergenceCriterionMet = difference < 1e-6; + + ++stats.iterations; } - // Determine the maximal value of any action. - ValueType max = 0; - auto choicesInEcIt = stateToLeavingChoicesOfEndComponent.find(currentStateId); - if (choicesInEcIt != stateToLeavingChoicesOfEndComponent.end()) { - STORM_LOG_TRACE("Sampling from actions leaving the previously detected EC."); - for (auto const& row : *choicesInEcIt->second) { - ValueType current = 0; - for (auto const& element : transitionMatrix[row]) { - current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + if (storm::settings::generalSettings().isShowStatisticsSet()) { + std::cout << std::endl << "Learning summary -------------------------" << std::endl; + std::cout << "Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << "explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored, " << stats.numberOfTargetStates << " target states)" << std::endl; + std::cout << "Sampling iterations: " << stats.iterations << std::endl; + std::cout << "Maximal path length: " << stats.maxPathLength << std::endl; + } + + return std::make_tuple(initialStateIndex, bounds.getLowerBoundForState(initialStateIndex, explorationInformation), bounds.getUpperBoundForState(initialStateIndex, explorationInformation)); + } + + template + bool SparseMdpLearningModelChecker::samplePathFromState(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation, StateActionStack& stack, BoundValues& bounds, Statistics& stats) const { + + // Start the search from the initial state. + stack.push_back(std::make_pair(explorationInformation.getFirstInitialState(), 0)); + + bool foundTerminalState = false; + while (!foundTerminalState) { + StateType const& currentStateId = stack.back().first; + STORM_LOG_TRACE("State on top of stack is: " << currentStateId << "."); + + // If the state is not yet explored, we need to retrieve its behaviors. + auto unexploredIt = explorationInformation.unexploredStates.find(currentStateId); + if (unexploredIt != explorationInformation.unexploredStates.end()) { + STORM_LOG_TRACE("State was not yet explored."); + + // Explore the previously unexplored state. + storm::generator::CompressedState const& compressedState = unexploredIt->second; + foundTerminalState = exploreState(stateGeneration, currentStateId, compressedState, explorationInformation, bounds, stats); + explorationInformation.unexploredStates.erase(unexploredIt); + } else { + // If the state was already explored, we check whether it is a terminal state or not. + if (explorationInformation.isTerminal(currentStateId)) { + STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << "."); + foundTerminalState = true; } + } + + // If the state was not a terminal state, we continue the path search and sample the next state. + if (!foundTerminalState) { + // At this point, we can be sure that the state was expanded and that we can sample according to the + // probabilities in the matrix. + uint32_t chosenAction = sampleFromMaxActions(currentStateId, explorationInformation, bounds); + stack.back().second = chosenAction; + STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << "."); + + StateType successor = sampleSuccessorFromAction(chosenAction, explorationInformation); + STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << "."); + + // Put the successor state and a dummy action on top of the stack. + stack.emplace_back(successor, 0); + stats.maxPathLength = std::max(stats.maxPathLength, stack.size()); - max = std::max(max, current); + // If the current path length exceeds the threshold and the model is a nondeterministic one, we + // perform an EC detection. + if (stack.size() > stats.pathLengthUntilEndComponentDetection && !program.isDeterministicModel()) { + detectEndComponents(stack, explorationInformation, bounds); + + // Abort the current search. + STORM_LOG_TRACE("Aborting the search after EC detection."); + stack.clear(); + break; + } } + } + + return foundTerminalState; + } + + template + bool SparseMdpLearningModelChecker::exploreState(StateGeneration& stateGeneration, StateType const& currentStateId, storm::generator::CompressedState const& currentState, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const { + + bool isTerminalState = false; + bool isTargetState = false; + + // Before generating the behavior of the state, we need to determine whether it's a target state that + // does not need to be expanded. + stateGeneration.generator.load(currentState); + if (stateGeneration.isTargetState()) { + ++stats.numberOfTargetStates; + isTargetState = true; + isTerminalState = true; } else { - STORM_LOG_TRACE("Sampling from actions leaving the state."); - for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { - ValueType current = 0; - for (auto const& element : transitionMatrix[row]) { - current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + STORM_LOG_TRACE("Exploring state."); + + // If it needs to be expanded, we use the generator to retrieve the behavior of the new state. + storm::generator::StateBehavior behavior = stateGeneration.expand(); + STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices."); + + // Clumsily check whether we have found a state that forms a trivial BMEC. + if (behavior.getNumberOfChoices() == 0) { + isTerminalState = true; + } else if (behavior.getNumberOfChoices() == 1) { + auto const& onlyChoice = *behavior.begin(); + if (onlyChoice.size() == 1) { + auto const& onlyEntry = *onlyChoice.begin(); + if (onlyEntry.first == currentStateId) { + isTerminalState = true; + } } - max = std::max(max, current); } - } - - STORM_LOG_TRACE("Looking for action with value " << max << "."); + + // If the state was neither a trivial (non-accepting) terminal state nor a target state, we + // need to store its behavior. + if (!isTerminalState) { + // Next, we insert the behavior into our matrix structure. + StateType startRow = explorationInformation.matrix.size(); + explorationInformation.addRowsToMatrix(behavior.getNumberOfChoices()); + + // Terminate the row group. + explorationInformation.rowGroupIndices.push_back(explorationInformation.matrix.size()); + + ActionType currentAction = 0; + for (auto const& choice : behavior) { + for (auto const& entry : choice) { + std::cout << "adding " << currentStateId << " -> " << entry.first << " with prob " << entry.second << std::endl; + explorationInformation.matrix[startRow + currentAction].emplace_back(entry.first, entry.second); + } + + bounds.initializeActionBoundsForNextAction(computeBoundsOfAction(startRow + currentAction, explorationInformation, bounds)); - for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { - if (stateToRowGroupMapping[state] != unexploredMarker) { - std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [x, " << upperBoundsPerState[stateToRowGroupMapping[state]] << "]" << std::endl; + STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << bounds.getLowerBoundForAction(startRow + currentAction) << " and " << bounds.getUpperBoundForAction(startRow + currentAction) << "."); + + ++currentAction; + } + + bounds.initializeStateBoundsForNextState(computeBoundsOfState(currentStateId, explorationInformation, bounds)); + STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << bounds.getLowerBoundForState(currentStateId) << " and " << bounds.getUpperBoundForState(currentStateId) << "."); + } + } + + if (isTerminalState) { + STORM_LOG_TRACE("State does not need to be explored, because it is " << (isTargetState ? "a target state" : "a rejecting terminal state") << "."); + explorationInformation.addTerminalState(currentStateId); + + if (isTargetState) { + bounds.initializeStateBoundsForNextState(std::make_pair(storm::utility::one(), storm::utility::one())); + bounds.initializeStateBoundsForNextAction(std::make_pair(storm::utility::one(), storm::utility::one())); } else { - std::cout << "state " << state << " is unexplored" << std::endl; + bounds.initializeStateBoundsForNextState(std::make_pair(storm::utility::zero(), storm::utility::zero())); + bounds.initializeStateBoundsForNextAction(std::make_pair(storm::utility::zero(), storm::utility::zero())); } + + // Increase the size of the matrix, but leave the row empty. + explorationInformation.addRowsToMatrix(1); + + // Terminate the row group. + explorationInformation.newRowGroup(); } - if (choicesInEcIt != stateToLeavingChoicesOfEndComponent.end()) { + // Finally, map the unexplored state to the row group. + explorationInformation.assignStateToNextRowGroup(currentStateId); + STORM_LOG_TRACE("Assigning row group " << explorationInformation.getRowGroup(currentStateId) << " to state " << currentStateId << "."); + + return isTerminalState; + } + + template + uint32_t SparseMdpLearningModelChecker::sampleMaxAction(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues& bounds) const { + StateType rowGroup = explorationInformation.getRowGroup(currentStateId); + + // First, determine all maximizing actions. + std::vector allMaxActions; + + // Determine the values of all available actions. + std::vector> actionValues; + auto choicesInEcIt = explorationInformation.stateToLeavingChoicesOfEndComponent.find(currentStateId); + if (choicesInEcIt != explorationInformation.stateToLeavingChoicesOfEndComponent.end()) { + STORM_LOG_TRACE("Sampling from actions leaving the previously detected EC."); for (auto const& row : *choicesInEcIt->second) { - ValueType current = 0; - for (auto const& element : transitionMatrix[row]) { - current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); - } - STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << "."); - - // If the action is one of the maximizing ones, insert it into our list. - if (comparator.isEqual(current, max)) { - allMaxActions.push_back(row); - } + actionValues.push_back(std::make_pair(row, computeUpperBoundOfAction(row, explorationInformation, bounds))); } } else { - for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { - ValueType current = 0; - for (auto const& element : transitionMatrix[row]) { - if (stateToRowGroupMapping[element.getColumn()] != unexploredMarker) { - std::cout << "upper bounds per state @ " << stateToRowGroupMapping[element.getColumn()] << " is " << upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]] << std::endl; - } - std::cout << "+= " << element.getValue() << " * " << "state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl; - current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); - } - STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << "."); - - // If the action is one of the maximizing ones, insert it into our list. - if (comparator.isEqual(current, max)) { - allMaxActions.push_back(row); - } + STORM_LOG_TRACE("Sampling from actions leaving the state."); + + for (uint32_t row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) { + actionValues.push_back(std::make_pair(row, computeUpperBoundOfAction(row, explorationInformation, bounds))); } } - STORM_LOG_ASSERT(!allMaxActions.empty(), "Must have at least one maximizing action."); + std::sort(actionValues.begin(), actionValues.end(), [] (std::pair const& a, std::pair const& b) { return b.second > a.second; } ); + auto end = std::equal_range(actionValues.begin(), actionValues.end(), [this] (std::pair const& a, std::pair const& b) { return comparator.isEqual(a.second, b.second); } ); // Now sample from all maximizing actions. - std::uniform_int_distribution distribution(0, allMaxActions.size() - 1); - return allMaxActions[distribution(randomGenerator)]; + std::uniform_int_distribution distribution(0, std::distance(actionValues.begin(), end)); + return actionValues[distribution(randomGenerator)].first; } template - typename SparseMdpLearningModelChecker::StateType SparseMdpLearningModelChecker::sampleSuccessorFromAction(StateType chosenAction, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping) { - uint32_t row = chosenAction; - + typename SparseMdpLearningModelChecker::StateType SparseMdpLearningModelChecker::sampleSuccessorFromAction(ActionType const& chosenAction, ExplorationInformation const& explorationInformation) const { // TODO: precompute this? - std::vector probabilities(transitionMatrix[row].size()); - std::transform(transitionMatrix[row].begin(), transitionMatrix[row].end(), probabilities.begin(), [] (storm::storage::MatrixEntry const& entry) { return entry.getValue(); } ); + std::vector probabilities(explorationInformation.getRowOfMatrix(chosenAction).size()); + std::transform(explorationInformation.getRowOfMatrix(chosenAction).begin(), explorationInformation.getRowOfMatrix(chosenAction).end(), probabilities.begin(), [] (storm::storage::MatrixEntry const& entry) { return entry.getValue(); } ); // Now sample according to the probabilities. std::discrete_distribution distribution(probabilities.begin(), probabilities.end()); StateType offset = distribution(randomGenerator); - return transitionMatrix[row][offset].getColumn(); + return explorationInformation.getRowOfMatrix(chosenAction)[offset].getColumn(); } template - storm::expressions::Expression SparseMdpLearningModelChecker::getTargetStateExpression(storm::logic::Formula const& subformula) { - storm::expressions::Expression result; - if (subformula.isAtomicExpressionFormula()) { - result = subformula.asAtomicExpressionFormula().getExpression(); - } else { - result = program.getLabelExpression(subformula.asAtomicLabelFormula().getLabel()); - } - return result; - } - - template - void SparseMdpLearningModelChecker::detectEndComponents(std::vector> const& stateActionStack, boost::container::flat_set& terminalStates, std::vector>>& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, std::unordered_map& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) const { - + void SparseMdpLearningModelChecker::detectEndComponents(StateActionStack const& stack, ExplorationInformation& explorationInformation, BoundValues& bounds) const { STORM_LOG_TRACE("Starting EC detection."); // Outline: @@ -281,13 +337,14 @@ namespace storm { // Determine the set of states that was expanded. std::vector relevantStates; - for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { - if (stateToRowGroupMapping[state] != unexploredMarker) { + for (StateType state = 0; state < explorationInformation.stateStorage.numberOfStates; ++state) { + if (!explorationInformation.isUnexplored(state)) { relevantStates.push_back(state); } } + // Sort according to the actual row groups so we can insert the elements in order later. - std::sort(relevantStates.begin(), relevantStates.end(), [&stateToRowGroupMapping] (StateType const& a, StateType const& b) { return stateToRowGroupMapping[a] < stateToRowGroupMapping[b]; }); + std::sort(relevantStates.begin(), relevantStates.end(), [&explorationInformation] (StateType const& a, StateType const& b) { return explorationInformation.getRowGroup(a) < explorationInformation.getRowGroup(b); }); StateType unexploredState = relevantStates.size(); // Create a mapping for faster look-up during the translation of flexible matrix to the real sparse matrix. @@ -301,10 +358,10 @@ namespace storm { StateType currentRow = 0; for (auto const& state : relevantStates) { builder.newRowGroup(currentRow); - StateType rowGroup = stateToRowGroupMapping[state]; - for (auto row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { + StateType rowGroup = explorationInformation.getRowGroup(state); + for (auto row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) { ValueType unexpandedProbability = storm::utility::zero(); - for (auto const& entry : transitionMatrix[row]) { + for (auto const& entry : explorationInformation.getRowOfMatrix(row)) { auto it = relevantStateToNewRowGroupMapping.find(entry.getColumn()); if (it != relevantStateToNewRowGroupMapping.end()) { // If the entry is a relevant state, we copy it over (and compensate for the offset change). @@ -340,41 +397,41 @@ namespace storm { bool containsTargetState = false; // Now we record all choices leaving the EC. - ChoiceSetPointer leavingChoices = std::make_shared(); + ActionSetPointer leavingChoices = std::make_shared(); for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. std::cout << "local state: " << stateAndChoices.first << std::endl; StateType originalState = relevantStates[stateAndChoices.first]; std::cout << "original state: " << originalState << std::endl; - uint32_t originalRowGroup = stateToRowGroupMapping[originalState]; + uint32_t originalRowGroup = explorationInformation.getRowGroup(originalState); std::cout << "original row group: " << originalRowGroup << std::endl; - // TODO: This check for a target state is a bit hackish and only works for max probabilities. - if (!containsTargetState && lowerBoundsPerState[originalRowGroup] == storm::utility::one()) { + // TODO: This checks for a target state is a bit hackish and only works for max probabilities. + if (!containsTargetState && comparator.isOne(bounds.getLowerBoundForRowGroup(originalRowGroup, explorationInformation))) { containsTargetState = true; } auto includedChoicesIt = stateAndChoices.second.begin(); auto includedChoicesIte = stateAndChoices.second.end(); - for (auto choice = rowGroupIndices[originalRowGroup]; choice < rowGroupIndices[originalRowGroup + 1]; ++choice) { + for (auto action = explorationInformation.getStartRowOfGroup(originalRowGroup); action < explorationInformation.getStartRowOfGroup(originalRowGroup + 1); ++action) { if (includedChoicesIt != includedChoicesIte) { STORM_LOG_TRACE("Next (local) choice contained in MEC is " << (*includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first])); - STORM_LOG_TRACE("Current (local) choice iterated is " << (choice - rowGroupIndices[originalRowGroup])); - if (choice - rowGroupIndices[originalRowGroup] != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) { + STORM_LOG_TRACE("Current (local) choice iterated is " << (action - explorationInformation.getStartRowOfGroup(originalRowGroup))); + if (action - explorationInformation.getStartRowOfGroup(originalRowGroup) != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) { STORM_LOG_TRACE("Choice leaves the EC."); - leavingChoices->insert(choice); + leavingChoices->insert(action); } else { STORM_LOG_TRACE("Choice stays in the EC."); ++includedChoicesIt; } } else { STORM_LOG_TRACE("Choice leaves the EC, because there is no more choice staying in the EC."); - leavingChoices->insert(choice); + leavingChoices->insert(action); } } - stateToLeavingChoicesOfEndComponent[originalState] = leavingChoices; + explorationInformation.stateToLeavingChoicesOfEndComponent[originalState] = leavingChoices; } // If one of the states of the EC is a target state, all states in the EC have probability 1. @@ -383,10 +440,10 @@ namespace storm { for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. StateType originalState = relevantStates[stateAndChoices.first]; - - STORM_LOG_TRACE("Setting lower bound of state in row group " << stateToRowGroupMapping[originalState] << " to 1."); - lowerBoundsPerState[stateToRowGroupMapping[originalState]] = storm::utility::one(); - terminalStates.insert(originalState); + + STORM_LOG_TRACE("Setting lower bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 1."); + bounds.setLowerBoundForState(originalState, explorationInformation, storm::utility::one()); + explorationInformation.addTerminalState(originalState); } } else if (leavingChoices->empty()) { STORM_LOG_TRACE("MEC's leaving choices are empty."); @@ -395,317 +452,138 @@ namespace storm { // Compute the state of the original model that corresponds to the current state. StateType originalState = relevantStates[stateAndChoices.first]; - STORM_LOG_TRACE("Setting upper bound of state in row group " << stateToRowGroupMapping[originalState] << " to 0."); - upperBoundsPerState[stateToRowGroupMapping[originalState]] = storm::utility::zero(); - terminalStates.insert(originalState); + STORM_LOG_TRACE("Setting upper bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 0."); + bounds.setUpperBoundForState(originalState, explorationInformation, storm::utility::zero()); + explorationInformation.addTerminalState(originalState); } } } } - + template - bool SparseMdpLearningModelChecker::exploreState(storm::generator::PrismNextStateGenerator& generator, std::function const& stateToIdCallback, StateType const& currentStateId, storm::generator::CompressedState const& compressedState, StateType const& unexploredMarker, boost::container::flat_set& terminalStates, std::vector>>& matrix, std::vector& rowGroupIndices, std::vector& stateToRowGroupMapping, storm::expressions::Expression const& targetStateExpression, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, Statistics& stats) { - - bool isTerminalState = false; - bool isTargetState = false; - - // Map the unexplored state to a row group. - stateToRowGroupMapping[currentStateId] = rowGroupIndices.size() - 1; - STORM_LOG_TRACE("Assigning row group " << stateToRowGroupMapping[currentStateId] << " to state " << currentStateId << "."); - lowerBoundsPerState.push_back(storm::utility::zero()); - upperBoundsPerState.push_back(storm::utility::one()); - - // Before generating the behavior of the state, we need to determine whether it's a target state that - // does not need to be expanded. - generator.load(compressedState); - if (generator.satisfies(targetStateExpression)) { - ++stats.numberOfTargetStates; - isTargetState = true; - isTerminalState = true; - } else { - STORM_LOG_TRACE("Exploring state."); - - // If it needs to be expanded, we use the generator to retrieve the behavior of the new state. - storm::generator::StateBehavior behavior = generator.expand(stateToIdCallback); - STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices."); - - // Clumsily check whether we have found a state that forms a trivial BMEC. - if (behavior.getNumberOfChoices() == 0) { - isTerminalState = true; - } else if (behavior.getNumberOfChoices() == 1) { - auto const& onlyChoice = *behavior.begin(); - if (onlyChoice.size() == 1) { - auto const& onlyEntry = *onlyChoice.begin(); - if (onlyEntry.first == currentStateId) { - isTerminalState = true; - } - } - } - - // If the state was neither a trivial (non-accepting) terminal state nor a target state, we - // need to store its behavior. - if (!isTerminalState) { - // Next, we insert the behavior into our matrix structure. - StateType startRow = matrix.size(); - matrix.resize(startRow + behavior.getNumberOfChoices()); - - // Terminate the row group. - rowGroupIndices.push_back(matrix.size()); - - uint32_t currentAction = 0; - for (auto const& choice : behavior) { - for (auto const& entry : choice) { - std::cout << "adding " << currentStateId << " -> " << entry.first << " with prob " << entry.second << std::endl; - matrix[startRow + currentAction].emplace_back(entry.first, entry.second); - } - - lowerBoundsPerAction.push_back(storm::utility::zero()); - upperBoundsPerAction.push_back(storm::utility::one()); - - std::pair values = computeValuesOfChoice(startRow + currentAction, matrix, stateToRowGroupMapping, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); - lowerBoundsPerAction.back() = values.first; - upperBoundsPerAction.back() = values.second; - - STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << lowerBoundsPerAction.back() << " and " << upperBoundsPerAction.back() << "."); - - ++currentAction; - } - - std::pair values = computeValuesOfState(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); - lowerBoundsPerState.back() = values.first; - upperBoundsPerState.back() = values.second; - - STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << lowerBoundsPerState.back() << " and " << upperBoundsPerState.back() << "."); - } + ValueType SparseMdpLearningModelChecker::computeLowerBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { + ValueType result = storm::utility::zero(); + for (auto const& element : explorationInformation.getRowOfMatrix(action)) { + result += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation); } - - if (isTerminalState) { - STORM_LOG_TRACE("State does not need to be explored, because it is " << (isTargetState ? "a target state" : "a rejecting terminal state") << "."); - terminalStates.insert(currentStateId); - - if (isTargetState) { - lowerBoundsPerState.back() = storm::utility::one(); - lowerBoundsPerAction.push_back(storm::utility::one()); - upperBoundsPerAction.push_back(storm::utility::one()); - } else { - upperBoundsPerState.back() = storm::utility::zero(); - lowerBoundsPerAction.push_back(storm::utility::zero()); - upperBoundsPerAction.push_back(storm::utility::zero()); - } - - // Increase the size of the matrix, but leave the row empty. - matrix.resize(matrix.size() + 1); - - // Terminate the row group. - rowGroupIndices.push_back(matrix.size()); + return result; + } + + template + ValueType SparseMdpLearningModelChecker::computeUpperBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { + ValueType result = storm::utility::zero(); + for (auto const& element : explorationInformation.getRowOfMatrix(action)) { + result += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation); } - - return isTerminalState; + return result; } template - bool SparseMdpLearningModelChecker::samplePathFromState(storm::generator::PrismNextStateGenerator& generator, std::function const& stateToIdCallback, StateType initialStateIndex, std::vector>& stateActionStack, std::unordered_map& unexploredStates, StateType const& unexploredMarker, boost::container::flat_set& terminalStates, std::vector>>& matrix, std::vector& rowGroupIndices, std::vector& stateToRowGroupMapping, std::unordered_map& stateToLeavingChoicesOfEndComponent, storm::expressions::Expression const& targetStateExpression, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, Statistics& stats) { - - // Start the search from the initial state. - stateActionStack.push_back(std::make_pair(initialStateIndex, 0)); - - bool foundTerminalState = false; - while (!foundTerminalState) { - StateType const& currentStateId = stateActionStack.back().first; - STORM_LOG_TRACE("State on top of stack is: " << currentStateId << "."); - - // If the state is not yet explored, we need to retrieve its behaviors. - auto unexploredIt = unexploredStates.find(currentStateId); - if (unexploredIt != unexploredStates.end()) { - STORM_LOG_TRACE("State was not yet explored."); - - // Explore the previously unexplored state. - foundTerminalState = exploreState(generator, stateToIdCallback, currentStateId, unexploredIt->second); - unexploredStates.erase(unexploredIt); - } else { - // If the state was already explored, we check whether it is a terminal state or not. - if (terminalStates.find(currentStateId) != terminalStates.end()) { - STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << "."); - foundTerminalState = true; - } - } - - // If the state was not a terminal state, we continue the path search and sample the next state. - if (!foundTerminalState) { - std::cout << "(2) stack is:" << std::endl; - for (auto const& el : stateActionStack) { - std::cout << el.first << "-[" << el.second << "]-> "; - } - std::cout << std::endl; - - for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { - if (stateToRowGroupMapping[state] != unexploredMarker) { - std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: "; - for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) { - std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], "; - } - std::cout << std::endl; - } else { - std::cout << "state " << state << " is unexplored" << std::endl; - } - } - - // At this point, we can be sure that the state was expanded and that we can sample according to the - // probabilities in the matrix. - uint32_t chosenAction = sampleFromMaxActions(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, upperBoundsPerState, stateToLeavingChoicesOfEndComponent, unexploredMarker); - stateActionStack.back().second = chosenAction; - STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << "."); - - StateType successor = sampleSuccessorFromAction(chosenAction, matrix, rowGroupIndices, stateToRowGroupMapping); - STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << "."); - - // Put the successor state and a dummy action on top of the stack. - stateActionStack.emplace_back(successor, 0); - stats.maxPathLength = std::max(stats.maxPathLength, stateActionStack.size()); - - // If the current path length exceeds the threshold and the model is a nondeterministic one, we - // perform an EC detection. - if (stateActionStack.size() > stats.pathLengthUntilEndComponentDetection && !program.isDeterministicModel()) { - detectEndComponents(stateActionStack, terminalStates, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, stateToLeavingChoicesOfEndComponent, unexploredMarker); - - // Abort the current search. - STORM_LOG_TRACE("Aborting the search after EC detection."); - stateActionStack.clear(); - break; - } - } + ValueType SparseMdpLearningModelChecker::computeLowerBoundOfState(StateType const& state, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { + StateType group = explorationInformation.getRowGroup(state); + ValueType result = std::make_pair(storm::utility::zero(), storm::utility::zero()); + for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) { + ValueType actionValue = computeLowerBoundOfAction(action, explorationInformation, bounds); + result = std::max(actionValue, result); } - - return foundTerminalState; + return result; + } + + template + ValueType SparseMdpLearningModelChecker::computeUpperBoundOfState(StateType const& state, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { + StateType group = explorationInformation.getRowGroup(state); + ValueType result = std::make_pair(storm::utility::zero(), storm::utility::zero()); + for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) { + ValueType actionValue = computeUpperBoundOfAction(action, explorationInformation, bounds); + result = std::max(actionValue, result); + } + return result; } template - std::tuple::StateType, ValueType, ValueType> SparseMdpLearningModelChecker::performLearningProcedure(storm::expressions::Expression const& targetStateExpression, storm::storage::sparse::StateStorage& stateStorage, storm::generator::PrismNextStateGenerator& generator, std::function const& stateToIdCallback, std::vector>>& matrix, std::vector& rowGroupIndices, std::vector& stateToRowGroupMapping, std::unordered_map& unexploredStates, StateType const& unexploredMarker) { - - // Generate the initial state so we know where to start the simulation. - stateStorage.initialStateIndices = generator.getInitialStates(stateToIdCallback); - STORM_LOG_THROW(stateStorage.initialStateIndices.size() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine."); - StateType initialStateIndex = stateStorage.initialStateIndices.front(); - - // A set storing all states in which to terminate the search. - boost::container::flat_set terminalStates; - - // Vectors to store the lower/upper bounds for each action (in each state). - std::vector lowerBoundsPerAction; - std::vector upperBoundsPerAction; - std::vector lowerBoundsPerState; - std::vector upperBoundsPerState; - - // Since we might run into end-components, we track a mapping from states in ECs to all leaving choices of - // that EC. - std::unordered_map stateToLeavingChoicesOfEndComponent; - - // Now perform the actual sampling. - std::vector> stateActionStack; - - Statistics stats; - bool convergenceCriterionMet = false; - while (!convergenceCriterionMet) { - bool result = samplePathFromState(generator, stateToIdCallback, initialStateIndex, stateActionStack, unexploredStates, unexploredMarker, terminalStates, matrix, rowGroupIndices, stateToRowGroupMapping, stateToLeavingChoicesOfEndComponent, targetStateExpression, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, stats); - - // If a terminal state was found, we update the probabilities along the path contained in the stack. - if (result) { - // Update the bounds along the path to the terminal state. - STORM_LOG_TRACE("Found terminal state, updating probabilities along path."); - updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); - } else { - // If not terminal state was found, the search aborted, possibly because of an EC-detection. In this - // case, we cannot update the probabilities. - STORM_LOG_TRACE("Did not find terminal state."); - } - - // Sanity check of results. - for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { - if (stateToRowGroupMapping[state] != unexploredMarker) { - STORM_LOG_ASSERT(lowerBoundsPerState[stateToRowGroupMapping[state]] <= upperBoundsPerState[stateToRowGroupMapping[state]], "The bounds for state " << state << " are not in a sane relation: " << lowerBoundsPerState[stateToRowGroupMapping[state]] << " > " << upperBoundsPerState[stateToRowGroupMapping[state]] << "."); - } - } - -// TODO: remove debug output when superfluous -// for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { -// if (stateToRowGroupMapping[state] != unexploredMarker) { -// std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: "; -// for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) { -// std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], "; -// } -// std::cout << std::endl; -// } else { -// std::cout << "state " << state << " is unexplored" << std::endl; -// } -// } - - STORM_LOG_DEBUG("Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored)."); - STORM_LOG_DEBUG("Value of initial state is in [" << lowerBoundsPerState[initialStateIndex] << ", " << upperBoundsPerState[initialStateIndex] << "]."); - ValueType difference = upperBoundsPerState[initialStateIndex] - lowerBoundsPerState[initialStateIndex]; - STORM_LOG_DEBUG("Difference after iteration " << stats.iterations << " is " << difference << "."); - convergenceCriterionMet = difference < 1e-6; - - ++stats.iterations; + std::pair SparseMdpLearningModelChecker::computeBoundsOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { + // TODO: take into account self-loops? + std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); + for (auto const& element : explorationInformation.getRowOfMatrix(action)) { + result.first += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation); + result.second += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation); } - - if (storm::settings::generalSettings().isShowStatisticsSet()) { - std::cout << std::endl << "Learning summary -------------------------" << std::endl; - std::cout << "Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored, " << stats.numberOfTargetStates << " target states)" << std::endl; - std::cout << "Sampling iterations: " << stats.iterations << std::endl; - std::cout << "Maximal path length: " << stats.maxPathLength << std::endl; + return result; + } + + template + std::pair SparseMdpLearningModelChecker::computeBoundsOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { + StateType group = explorationInformation.getRowGroup(currentStateId); + std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); + for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) { + std::pair actionValues = computeBoundsOfAction(action, explorationInformation, bounds); + result.first = std::max(actionValues.first, result.first); + result.second = std::max(actionValues.second, result.second); } - - return std::make_tuple(initialStateIndex, lowerBoundsPerState[initialStateIndex], upperBoundsPerState[initialStateIndex]); + return result; } template - std::unique_ptr SparseMdpLearningModelChecker::computeReachabilityProbabilities(CheckTask const& checkTask) { - storm::logic::EventuallyFormula const& eventuallyFormula = checkTask.getFormula(); - storm::logic::Formula const& subformula = eventuallyFormula.getSubformula(); - STORM_LOG_THROW(subformula.isAtomicExpressionFormula() || subformula.isAtomicLabelFormula(), storm::exceptions::NotSupportedException, "Learning engine can only deal with formulas of the form 'F \"label\"' or 'F expression'."); - - // Derive the expression for the target states, so we know when to abort the learning run. - storm::expressions::Expression targetStateExpression = getTargetStateExpression(subformula); - - // A container for the encountered states. - storm::storage::sparse::StateStorage stateStorage(variableInformation.getTotalBitOffset(true)); + void SparseMdpLearningModelChecker::updateProbabilityBoundsAlongSampledPath(StateActionStack& stack, ExplorationInformation const& explorationInformation, BoundValues& bounds) const { + std::cout << "stack is:" << std::endl; + for (auto const& el : stack) { + std::cout << el.first << "-[" << el.second << "]-> "; + } + std::cout << std::endl; - // A generator used to explore the model. - storm::generator::PrismNextStateGenerator generator(program, variableInformation, false); + stack.pop_back(); + while (!stack.empty()) { + updateProbabilityOfAction(stack.back().first, stack.back().second, explorationInformation, bounds); + stack.pop_back(); + } + } + + template + void SparseMdpLearningModelChecker::updateProbabilityOfAction(StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues& bounds) const { + // Compute the new lower/upper values of the action. + std::pair newBoundsForAction = computeBoundsOfAction(action, explorationInformation, bounds); - // A container that stores the transitions found so far. - std::vector>> matrix; + // And set them as the current value. + bounds.setBoundForAction(action, newBoundsForAction); - // A vector storing where the row group of each state starts. - std::vector rowGroupIndices; - rowGroupIndices.push_back(0); + // Check if we need to update the values for the states. + bounds.setNewLowerBoundOfStateIfGreaterThanOld(state, explorationInformation, newBoundsForAction.first); - // A vector storing the mapping from state ids to row groups. - std::vector stateToRowGroupMapping; - StateType unexploredMarker = std::numeric_limits::max(); + StateType rowGroup = explorationInformation.getRowGroup(state); + if (newBoundsForAction < bounds.getUpperBoundOfRowGroup(rowGroup)) { + + } - // A mapping of unexplored IDs to their actual compressed states. - std::unordered_map unexploredStates; + ValueType upperBound = computeUpperBoundOverAllOtherActions(state, action, explorationInformation, bounds); - // Create a callback for the next-state generator to enable it to request the index of states. - std::function stateToIdCallback = [&stateStorage, &stateToRowGroupMapping, &unexploredStates, &unexploredMarker] (storm::generator::CompressedState const& state) -> StateType { - StateType newIndex = stateStorage.numberOfStates; - - // Check, if the state was already registered. - std::pair actualIndexBucketPair = stateStorage.stateToId.findOrAddAndGetBucket(state, newIndex); - - if (actualIndexBucketPair.first == newIndex) { - ++stateStorage.numberOfStates; - stateToRowGroupMapping.push_back(unexploredMarker); - unexploredStates[newIndex] = state; + uint32_t sourceRowGroup = stateToRowGroupMapping[sourceStateId]; + if (newUpperValue < upperBoundsPerState[sourceRowGroup]) { + if (rowGroupIndices[sourceRowGroup + 1] - rowGroupIndices[sourceRowGroup] > 1) { + ValueType max = storm::utility::zero(); + + for (uint32_t currentAction = rowGroupIndices[sourceRowGroup]; currentAction < rowGroupIndices[sourceRowGroup + 1]; ++currentAction) { + std::cout << "cur: " << currentAction << std::endl; + if (currentAction == action) { + continue; + } + + ValueType currentValue = storm::utility::zero(); + for (auto const& element : transitionMatrix[currentAction]) { + currentValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + max = std::max(max, currentValue); + std::cout << "max is " << max << std::endl; + } + + newUpperValue = std::max(newUpperValue, max); } - return actualIndexBucketPair.first; - }; - - // Compute and return result. - std::tuple boundsForInitialState = performLearningProcedure(targetStateExpression, stateStorage, generator, stateToIdCallback, matrix, rowGroupIndices, stateToRowGroupMapping, unexploredStates, unexploredMarker); - return std::make_unique>(std::get<0>(boundsForInitialState), std::get<1>(boundsForInitialState)); + if (newUpperValue < upperBoundsPerState[sourceRowGroup]) { + STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << " (was " << upperBoundsPerState[sourceRowGroup] << ")."); + std::cout << "writing at index " << sourceRowGroup << std::endl; + upperBoundsPerState[sourceRowGroup] = newUpperValue; + } + } } template class SparseMdpLearningModelChecker; diff --git a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h index 5bc8adb40..777caf2d9 100644 --- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h +++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h @@ -6,7 +6,9 @@ #include "src/modelchecker/AbstractModelChecker.h" #include "src/storage/prism/Program.h" +#include "src/storage/sparse/StateStorage.h" +#include "src/generator/PrismNextStateGenerator.h" #include "src/generator/CompressedState.h" #include "src/generator/VariableInformation.h" @@ -27,53 +29,289 @@ namespace storm { } namespace modelchecker { - + template class SparseMdpLearningModelChecker : public AbstractModelChecker { public: typedef uint32_t StateType; - typedef boost::container::flat_set ChoiceSet; - typedef std::shared_ptr ChoiceSetPointer; + typedef uint32_t ActionType; + typedef boost::container::flat_set StateSet; + typedef boost::container::flat_set ActionSet; + typedef std::shared_ptr ActionSetPointer; + typedef std::vector> StateActionStack; SparseMdpLearningModelChecker(storm::prism::Program const& program, boost::optional> const& constantDefinitions); virtual bool canHandle(CheckTask const& checkTask) const override; - + virtual std::unique_ptr computeReachabilityProbabilities(CheckTask const& checkTask) override; private: + // A struct that keeps track of certain statistics during the computation. struct Statistics { - Statistics() : iterations(0), maxPathLength(0), numberOfTargetStates(0), pathLengthUntilEndComponentDetection(27) { + Statistics() : iterations(0), maxPathLength(0), numberOfTargetStates(0), numberOfExploredStates(0), pathLengthUntilEndComponentDetection(27) { // Intentionally left empty. } + std::size_t iterations; std::size_t maxPathLength; std::size_t numberOfTargetStates; + std::size_t numberOfExploredStates; std::size_t pathLengthUntilEndComponentDetection; }; - - bool exploreState(storm::generator::PrismNextStateGenerator& generator, std::function const& stateToIdCallback, StateType const& currentStateId, storm::generator::CompressedState const& compressedState, StateType const& unexploredMarker, boost::container::flat_set& terminalStates, std::vector>>& matrix, std::vector& rowGroupIndices, std::vector& stateToRowGroupMapping, storm::expressions::Expression const& targetStateExpression, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, Statistics& stats); - bool samplePathFromState(storm::generator::PrismNextStateGenerator& generator, std::function const& stateToIdCallback, StateType initialStateIndex, std::vector>& stateActionStack, std::unordered_map& unexploredStates, StateType const& unexploredMarker, boost::container::flat_set& terminalStates, std::vector>>& matrix, std::vector& rowGroupIndices, std::vector& stateToRowGroupMapping, std::unordered_map& stateToLeavingChoicesOfEndComponent, storm::expressions::Expression const& targetStateExpression, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, Statistics& stats); + // A struct containing the data required for state exploration. + struct StateGeneration { + StateGeneration(storm::generator::PrismNextStateGenerator&& generator, storm::expressions::Expression const& targetStateExpression) : generator(std::move(generator)), targetStateExpression(targetStateExpression) { + // Intentionally left empty. + } + + std::vector getInitialStates() { + return generator.getInitialStates(stateToIdCallback); + } + + storm::generator::StateBehavior expand() { + return generator.expand(stateToIdCallback); + } + + bool isTargetState() const { + return generator.satisfies(targetStateExpression); + } + + storm::generator::PrismNextStateGenerator generator; + std::function stateToIdCallback; + storm::expressions::Expression targetStateExpression; + }; + + // A structure containing the data assembled during exploration. + struct ExplorationInformation { + ExplorationInformation(uint_fast64_t bitsPerBucket, ActionType const& unexploredMarker = std::numeric_limits::max()) : stateStorage(bitsPerBucket), unexploredMarker(unexploredMarker) { + // Intentionally left empty. + } + + storm::storage::sparse::StateStorage stateStorage; + + std::vector>> matrix; + std::vector rowGroupIndices; + + std::vector stateToRowGroupMapping; + StateType unexploredMarker; + std::unordered_map unexploredStates; + + storm::OptimizationDirection optimizationDirection; + StateSet terminalStates; + + std::unordered_map stateToLeavingChoicesOfEndComponent; + + void setInitialStates(std::vector const& initialStates) { + stateStorage.initialStateIndices = initialStates; + } + + StateType getFirstInitialState() const { + return stateStorage.initialStateIndices.front(); + } + + std::size_t getNumberOfInitialStates() const { + return stateStorage.initialStateIndices.size(); + } + + void addUnexploredState(storm::generator::CompressedState const& compressedState) { + stateToRowGroupMapping.push_back(unexploredMarker); + unexploredStates[stateStorage.numberOfStates] = compressedState; + ++stateStorage.numberOfStates; + } + + void assignStateToRowGroup(StateType const& state, ActionType const& rowGroup) { + stateToRowGroupMapping[state] = rowGroup; + } + + void assignStateToNextRowGroup(StateType const& state) { + stateToRowGroupMapping[state] = rowGroupIndices.size() - 1; + } + + void newRowGroup(ActionType const& action) { + rowGroupIndices.push_back(action); + } + + void newRowGroup() { + newRowGroup(matrix.size()); + } + + std::size_t getNumberOfUnexploredStates() const { + return unexploredStates.size(); + } + + std::size_t getNumberOfDiscoveredStates() const { + return stateStorage.numberOfStates; + } + + StateType const& getRowGroup(StateType const& state) const { + return stateToRowGroupMapping[state]; + } + + StateType const& getUnexploredMarker() const { + return unexploredMarker; + } + + bool isUnexplored(StateType const& state) const { + return unexploredStates.find(state) == unexploredStates.end(); + } + + bool isTerminal(StateType const& state) const { + return terminalStates.find(state) != terminalStates.end(); + } + + ActionType const& getStartRowOfGroup(StateType const& group) const { + return rowGroupIndices[group]; + } + + void addTerminalState(StateType const& state) { + terminalStates.insert(state); + } + + std::vector>& getRowOfMatrix(ActionType const& row) { + return matrix[row]; + } + + std::vector> const& getRowOfMatrix(ActionType const& row) const { + return matrix[row]; + } + + void addRowsToMatrix(std::size_t const& count) { + matrix.resize(matrix.size() + count); + } + }; + + // A struct containg the lower and upper bounds per state and action. + struct BoundValues { + std::vector lowerBoundsPerState; + std::vector upperBoundsPerState; + std::vector lowerBoundsPerAction; + std::vector upperBoundsPerAction; + + std::pair getBoundsForState(StateType const& state, ExplorationInformation const& explorationInformation) const { + ActionType index = explorationInformation.getRowGroup(state); + if (index == explorationInformation.getUnexploredMarker()) { + return std::make_pair(storm::utility::zero(), storm::utility::one()); + } else { + return std::make_pair(lowerBoundsPerState[index], upperBoundsPerState[index]); + } + } + + ValueType getLowerBoundForState(StateType const& state, ExplorationInformation const& explorationInformation) const { + ActionType index = explorationInformation.getRowGroup(state); + if (index == explorationInformation.getUnexploredMarker()) { + return storm::utility::zero(); + } else { + return getLowerBoundForRowGroup(index, explorationInformation); + } + } + + ValueType getLowerBoundForRowGroup(StateType const& rowGroup, ExplorationInformation const& explorationInformation) const { + return lowerBoundsPerState[rowGroup]; + } + + ValueType getUpperBoundForState(StateType const& state, ExplorationInformation const& explorationInformation) const { + ActionType index = explorationInformation.getRowGroup(state); + if (index == explorationInformation.getUnexploredMarker()) { + return storm::utility::one(); + } else { + return getUpperBoundForRowGroup(index, explorationInformation); + } + } + + ValueType getUpperBoundForRowGroup(StateType const& rowGroup, ExplorationInformation const& explorationInformation) const { + return upperBoundsPerState[rowGroup]; + } + + std::pair getBoundsForAction(ActionType const& action) const { + return std::make_pair(lowerBoundsPerAction[action], upperBoundsPerAction[action]); + } + + ValueType const& getLowerBoundForAction(ActionType const& action) const { + return lowerBoundsPerAction[action]; + } + + ValueType const& getUpperBoundForAction(ActionType const& action) const { + return upperBoundsPerAction[action]; + } + + ValueType getDifferenceOfStateBounds(StateType const& state, ExplorationInformation const& explorationInformation) { + std::pair bounds = getBoundsForState(state, explorationInformation); + return bounds.second - bounds.first; + } + + void initializeStateBoundsForNextState(std::pair const& vals = std::pair(storm::utility::zero(), storm::utility::one())) { + lowerBoundsPerState.push_back(vals.first); + upperBoundsPerState.push_back(vals.second); + } + + void initializeActionBoundsForNextAction(std::pair const& vals = std::pair(storm::utility::zero(), storm::utility::one())) { + lowerBoundsPerAction.push_back(vals.first); + upperBoundsPerAction.push_back(vals.second); + } + + void setLowerBoundForState(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& value) { + lowerBoundsPerState[explorationInformation.getRowGroup(state)] = value; + } + + void setUpperBoundForState(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& value) { + upperBoundsPerState[explorationInformation.getRowGroup(state)] = value; + } + + void setBoundsForAction(ActionType const& action, std::pair const& values) { + lowerBoundsPerAction[action] = values.first; + upperBoundsPerAction[action] = values.second; + } + + void setBoundsForState(StateType const& state, ExplorationInformation const& explorationInformation, std::pair const& values) { + StateType const& rowGroup = explorationInformation.getRowGroup(state); + lowerBoundsPerState[rowGroup] = values.first; + upperBoundsPerState[rowGroup] = values.second; + } + + bool setNewLowerBoundOfStateIfGreaterThanOld(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& newLowerValue) { + StateType const& rowGroup = explorationInformation.getRowGroup(state); + if (lowerBoundsPerState[rowGroup] < newLowerValue) { + lowerBoundsPerState[rowGroup] = newLowerValue; + } + } + + bool setNewUpperBoundOfStateIfLessThanOld(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& newUpperValue) { + StateType const& rowGroup = explorationInformation.getRowGroup(state); + if (newUpperValue < upperBoundsPerState[rowGroup]) { + upperBoundsPerState[rowGroup] = newUpperValue; + } + } + }; - std::tuple performLearningProcedure(storm::expressions::Expression const& targetStateExpression, storm::storage::sparse::StateStorage& stateStorage, storm::generator::PrismNextStateGenerator& generator, std::function const& stateToIdCallback, std::vector>>& matrix, std::vector& rowGroupIndices, std::vector& stateToRowGroupMapping, std::unordered_map& unexploredStates, StateType const& unexploredMarker); + storm::expressions::Expression getTargetStateExpression(storm::logic::Formula const& subformula) const; - void updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const; + std::function createStateToIdCallback(ExplorationInformation& explorationInformation) const; - void updateProbabilitiesUsingStack(std::vector>& stateActionStack, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const; + std::tuple performLearningProcedure(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation) const; + + bool samplePathFromState(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation, StateActionStack& stack, BoundValues& bounds, Statistics& stats) const; - uint32_t sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& upperBoundsPerState, std::unordered_map const& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker); + bool exploreState(StateGeneration& stateGeneration, StateType const& currentStateId, storm::generator::CompressedState const& currentState, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const; - StateType sampleSuccessorFromAction(StateType chosenAction, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping); + uint32_t sampleMaxAction(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues& bounds) const; + + StateType sampleSuccessorFromAction(ActionType const& chosenAction, ExplorationInformation const& explorationInformation) const; - void detectEndComponents(std::vector> const& stateActionStack, boost::container::flat_set& terminalStates, std::vector>>& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, std::unordered_map& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) const; + void detectEndComponents(StateActionStack const& stack, ExplorationInformation& explorationInformation, BoundValues& bounds) const; - std::pair computeValuesOfChoice(uint32_t action, std::vector>> const& transitionMatrix, std::vector const& stateToRowGroupMapping, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker); + void updateProbabilityBoundsAlongSampledPath(StateActionStack& stack, ExplorationInformation const& explorationInformation, BoundValues& bounds) const; + + void updateProbabilityOfAction(StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues& bounds) const; - std::pair computeValuesOfState(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& lowerBounds, std::vector const& upperBounds, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker); + std::pair computeBoundsOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const; + std::pair computeBoundsOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const; + ValueType computeLowerBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const; + ValueType computeUpperBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const; + ValueType computeLowerBoundOfState(StateType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const; + ValueType computeUpperBoundOfState(StateType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const; - storm::expressions::Expression getTargetStateExpression(storm::logic::Formula const& subformula); - // The program that defines the model to check. storm::prism::Program program;