#include "src/modelchecker/reachability/SparseMdpLearningModelChecker.h" #include "src/storage/SparseMatrix.h" #include "src/storage/MaximalEndComponentDecomposition.h" #include "src/logic/FragmentSpecification.h" #include "src/utility/prism.h" #include "src/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "src/models/sparse/StandardRewardModel.h" #include "src/settings/SettingsManager.h" #include "src/settings/modules/GeneralSettings.h" #include "src/settings/modules/LearningSettings.h" #include "src/utility/macros.h" #include "src/exceptions/InvalidOperationException.h" #include "src/exceptions/InvalidPropertyException.h" #include "src/exceptions/NotSupportedException.h" namespace storm { namespace modelchecker { template SparseMdpLearningModelChecker::SparseMdpLearningModelChecker(storm::prism::Program const& program, boost::optional> const& constantDefinitions) : program(storm::utility::prism::preprocessProgram(program, constantDefinitions)), variableInformation(this->program), randomGenerator(std::chrono::system_clock::now().time_since_epoch().count()), comparator() { // Intentionally left empty. } template bool SparseMdpLearningModelChecker::canHandle(CheckTask const& checkTask) const { storm::logic::Formula const& formula = checkTask.getFormula(); storm::logic::FragmentSpecification fragment = storm::logic::propositional().setProbabilityOperatorsAllowed(true).setReachabilityProbabilityFormulasAllowed(true).setNestedOperatorsAllowed(false); return formula.isInFragment(fragment) && checkTask.isOnlyInitialStatesRelevantSet(); } template std::unique_ptr SparseMdpLearningModelChecker::computeReachabilityProbabilities(CheckTask const& checkTask) { storm::logic::EventuallyFormula const& eventuallyFormula = checkTask.getFormula(); storm::logic::Formula const& subformula = eventuallyFormula.getSubformula(); STORM_LOG_THROW(program.isDeterministicModel() || checkTask.isOptimizationDirectionSet(), storm::exceptions::InvalidPropertyException, "For nondeterministic systems, an optimization direction (min/max) must be given in the property."); StateGeneration stateGeneration(program, variableInformation, getTargetStateExpression(subformula)); ExplorationInformation explorationInformation(variableInformation.getTotalBitOffset(true), storm::settings::learningSettings().isLocalECDetectionSet()); explorationInformation.optimizationDirection = checkTask.isOptimizationDirectionSet() ? checkTask.getOptimizationDirection() : storm::OptimizationDirection::Maximize; // The first row group starts at action 0. explorationInformation.newRowGroup(0); // Create a callback for the next-state generator to enable it to request the index of states. stateGeneration.stateToIdCallback = createStateToIdCallback(explorationInformation); // Compute and return result. std::tuple boundsForInitialState = performLearningProcedure(stateGeneration, explorationInformation); return std::make_unique>(std::get<0>(boundsForInitialState), std::get<1>(boundsForInitialState)); } template storm::expressions::Expression SparseMdpLearningModelChecker::getTargetStateExpression(storm::logic::Formula const& subformula) const { std::shared_ptr preparedSubformula = subformula.substitute(program.getLabelToExpressionMapping()); storm::expressions::Expression result; try { result = preparedSubformula->toExpression(); } catch(storm::exceptions::InvalidOperationException const& e) { STORM_LOG_THROW(false, storm::exceptions::InvalidPropertyException, "The property refers to unknown labels."); } return result; } template std::function::StateType (storm::generator::CompressedState const&)> SparseMdpLearningModelChecker::createStateToIdCallback(ExplorationInformation& explorationInformation) const { return [&explorationInformation] (storm::generator::CompressedState const& state) -> StateType { StateType newIndex = explorationInformation.stateStorage.numberOfStates; // Check, if the state was already registered. std::pair actualIndexBucketPair = explorationInformation.stateStorage.stateToId.findOrAddAndGetBucket(state, newIndex); if (actualIndexBucketPair.first == newIndex) { explorationInformation.addUnexploredState(state); } return actualIndexBucketPair.first; }; } template std::tuple::StateType, ValueType, ValueType> SparseMdpLearningModelChecker::performLearningProcedure(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation) const { // Generate the initial state so we know where to start the simulation. explorationInformation.setInitialStates(stateGeneration.getInitialStates()); STORM_LOG_THROW(explorationInformation.getNumberOfInitialStates() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine."); StateType initialStateIndex = explorationInformation.getFirstInitialState(); // Create a structure that holds the bounds for the states and actions. BoundValues bounds; // Create a stack that is used to track the path we sampled. StateActionStack stack; // Now perform the actual sampling. Statistics stats; bool convergenceCriterionMet = false; while (!convergenceCriterionMet) { bool result = samplePathFromInitialState(stateGeneration, explorationInformation, stack, bounds, stats); // If a terminal state was found, we update the probabilities along the path contained in the stack. if (result) { // Update the bounds along the path to the terminal state. STORM_LOG_TRACE("Found terminal state, updating probabilities along path."); updateProbabilityBoundsAlongSampledPath(stack, explorationInformation, bounds); } else { // If not terminal state was found, the search aborted, possibly because of an EC-detection. In this // case, we cannot update the probabilities. STORM_LOG_TRACE("Did not find terminal state."); } STORM_LOG_DEBUG("Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << " explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored)."); STORM_LOG_DEBUG("Value of initial state is in [" << bounds.getLowerBoundForState(initialStateIndex, explorationInformation) << ", " << bounds.getUpperBoundForState(initialStateIndex, explorationInformation) << "]."); ValueType difference = bounds.getDifferenceOfStateBounds(initialStateIndex, explorationInformation); STORM_LOG_DEBUG("Difference after iteration " << stats.iterations << " is " << difference << "."); convergenceCriterionMet = comparator.isZero(difference); ++stats.iterations; } if (storm::settings::generalSettings().isShowStatisticsSet()) { std::cout << std::endl << "Learning summary -------------------------" << std::endl; std::cout << "Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << " explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored, " << stats.numberOfTargetStates << " target)" << std::endl; std::cout << "Sampled paths: " << stats.iterations << std::endl; std::cout << "Maximal path length: " << stats.maxPathLength << std::endl; std::cout << "EC detections: " << stats.ecDetections << " (" << stats.failedEcDetections << " failed, " << stats.totalNumberOfEcDetected << " EC(s) detected)" << std::endl; } return std::make_tuple(initialStateIndex, bounds.getLowerBoundForState(initialStateIndex, explorationInformation), bounds.getUpperBoundForState(initialStateIndex, explorationInformation)); } template bool SparseMdpLearningModelChecker::samplePathFromInitialState(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation, StateActionStack& stack, BoundValues& bounds, Statistics& stats) const { // Start the search from the initial state. stack.push_back(std::make_pair(explorationInformation.getFirstInitialState(), 0)); // As long as we didn't find a terminal (accepting or rejecting) state in the search, sample a new successor. bool foundTerminalState = false; while (!foundTerminalState) { StateType const& currentStateId = stack.back().first; STORM_LOG_TRACE("State on top of stack is: " << currentStateId << "."); // If the state is not yet explored, we need to retrieve its behaviors. auto unexploredIt = explorationInformation.unexploredStates.find(currentStateId); if (unexploredIt != explorationInformation.unexploredStates.end()) { STORM_LOG_TRACE("State was not yet explored."); // Explore the previously unexplored state. storm::generator::CompressedState const& compressedState = unexploredIt->second; foundTerminalState = exploreState(stateGeneration, currentStateId, compressedState, explorationInformation, bounds, stats); explorationInformation.unexploredStates.erase(unexploredIt); } else { // If the state was already explored, we check whether it is a terminal state or not. if (explorationInformation.isTerminal(currentStateId)) { STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << "."); foundTerminalState = true; } } // If the state was not a terminal state, we continue the path search and sample the next state. if (!foundTerminalState) { // At this point, we can be sure that the state was expanded and that we can sample according to the // probabilities in the matrix. uint32_t chosenAction = sampleActionOfState(currentStateId, explorationInformation, bounds); stack.back().second = chosenAction; STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << "."); StateType successor = sampleSuccessorFromAction(chosenAction, explorationInformation); STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << "."); // Put the successor state and a dummy action on top of the stack. stack.emplace_back(successor, 0); stats.maxPathLength = std::max(stats.maxPathLength, stack.size()); // If the current path length exceeds the threshold and the model is a nondeterministic one, we // perform an EC detection. if (stack.size() > explorationInformation.getPathLengthUntilEndComponentDetection()) { bool success = detectEndComponents(stack, explorationInformation, bounds, stats); // Only if the detection found an EC, we abort the search. if (success) { // Abort the current search. STORM_LOG_TRACE("Aborting the search after succesful EC detection."); stack.clear(); break; } } } } return foundTerminalState; } template bool SparseMdpLearningModelChecker::exploreState(StateGeneration& stateGeneration, StateType const& currentStateId, storm::generator::CompressedState const& currentState, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const { bool isTerminalState = false; bool isTargetState = false; ++stats.numberOfExploredStates; // Finally, map the unexplored state to the row group. explorationInformation.assignStateToNextRowGroup(currentStateId); STORM_LOG_TRACE("Assigning row group " << explorationInformation.getRowGroup(currentStateId) << " to state " << currentStateId << "."); // Initialize the bounds, because some of the following computations depend on the values to be available for // all states that have been assigned to a row-group. bounds.initializeBoundsForNextState(); // Before generating the behavior of the state, we need to determine whether it's a target state that // does not need to be expanded. stateGeneration.generator.load(currentState); if (stateGeneration.isTargetState()) { ++stats.numberOfTargetStates; isTargetState = true; isTerminalState = true; } else { STORM_LOG_TRACE("Exploring state."); // If it needs to be expanded, we use the generator to retrieve the behavior of the new state. storm::generator::StateBehavior behavior = stateGeneration.expand(); STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices."); // Clumsily check whether we have found a state that forms a trivial BMEC. bool otherSuccessor = false; for (auto const& choice : behavior) { for (auto const& entry : choice) { if (entry.first != currentStateId) { otherSuccessor = true; break; } } } isTerminalState = !otherSuccessor; // If the state was neither a trivial (non-accepting) terminal state nor a target state, we // need to store its behavior. if (!isTerminalState) { // Next, we insert the behavior into our matrix structure. StateType startRow = explorationInformation.matrix.size(); explorationInformation.addRowsToMatrix(behavior.getNumberOfChoices()); ActionType currentAction = 0; // Retrieve the lowest state bounds (wrt. to the current optimization direction). std::pair stateBounds = getLowestBounds(explorationInformation.optimizationDirection); for (auto const& choice : behavior) { for (auto const& entry : choice) { explorationInformation.getRowOfMatrix(startRow + currentAction).emplace_back(entry.first, entry.second); } std::pair actionBounds = computeBoundsOfAction(startRow + currentAction, explorationInformation, bounds); bounds.initializeBoundsForNextAction(actionBounds); stateBounds = combineBounds(explorationInformation.optimizationDirection, stateBounds, actionBounds); STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << bounds.getLowerBoundForAction(startRow + currentAction) << " and " << bounds.getUpperBoundForAction(startRow + currentAction) << "."); ++currentAction; } // Terminate the row group. explorationInformation.rowGroupIndices.push_back(explorationInformation.matrix.size()); bounds.setBoundsForState(currentStateId, explorationInformation, stateBounds); STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << bounds.getLowerBoundForState(currentStateId, explorationInformation) << " and " << bounds.getUpperBoundForState(currentStateId, explorationInformation) << "."); } } if (isTerminalState) { STORM_LOG_TRACE("State does not need to be explored, because it is " << (isTargetState ? "a target state" : "a rejecting terminal state") << "."); explorationInformation.addTerminalState(currentStateId); if (isTargetState) { bounds.setBoundsForState(currentStateId, explorationInformation, std::make_pair(storm::utility::one(), storm::utility::one())); bounds.initializeBoundsForNextAction(std::make_pair(storm::utility::one(), storm::utility::one())); } else { bounds.setBoundsForState(currentStateId, explorationInformation, std::make_pair(storm::utility::zero(), storm::utility::zero())); bounds.initializeBoundsForNextAction(std::make_pair(storm::utility::zero(), storm::utility::zero())); } // Increase the size of the matrix, but leave the row empty. explorationInformation.addRowsToMatrix(1); // Terminate the row group. explorationInformation.newRowGroup(); } return isTerminalState; } template typename SparseMdpLearningModelChecker::ActionType SparseMdpLearningModelChecker::sampleActionOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues& bounds) const { // Determine the values of all available actions. std::vector> actionValues; StateType rowGroup = explorationInformation.getRowGroup(currentStateId); // Check for cases in which we do not need to perform more work. if (explorationInformation.onlyOneActionAvailable(rowGroup)) { return explorationInformation.getStartRowOfGroup(rowGroup); } // If there are more choices to consider, start by gathering the values of relevant actions. STORM_LOG_TRACE("Sampling from actions leaving the state."); for (uint32_t row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) { actionValues.push_back(std::make_pair(row, bounds.getBoundForAction(explorationInformation.optimizationDirection, row))); } STORM_LOG_ASSERT(!actionValues.empty(), "Values for actions must not be empty."); // Sort the actions wrt. to the optimization direction. if (explorationInformation.maximize()) { std::sort(actionValues.begin(), actionValues.end(), [] (std::pair const& a, std::pair const& b) { return a.second > b.second; } ); } else { std::sort(actionValues.begin(), actionValues.end(), [] (std::pair const& a, std::pair const& b) { return a.second < b.second; } ); } // Determine the first elements of the sorted range that agree on their value. auto end = ++actionValues.begin(); while (end != actionValues.end() && comparator.isEqual(actionValues.begin()->second, end->second)) { ++end; } // Now sample from all maximizing actions. std::uniform_int_distribution distribution(0, std::distance(actionValues.begin(), end) - 1); return actionValues[distribution(randomGenerator)].first; } template typename SparseMdpLearningModelChecker::StateType SparseMdpLearningModelChecker::sampleSuccessorFromAction(ActionType const& chosenAction, ExplorationInformation const& explorationInformation) const { // TODO: precompute this? std::vector probabilities(explorationInformation.getRowOfMatrix(chosenAction).size()); std::transform(explorationInformation.getRowOfMatrix(chosenAction).begin(), explorationInformation.getRowOfMatrix(chosenAction).end(), probabilities.begin(), [] (storm::storage::MatrixEntry const& entry) { return entry.getValue(); } ); // Now sample according to the probabilities. std::discrete_distribution distribution(probabilities.begin(), probabilities.end()); StateType offset = distribution(randomGenerator); return explorationInformation.getRowOfMatrix(chosenAction)[offset].getColumn(); } template bool SparseMdpLearningModelChecker::detectEndComponents(StateActionStack const& stack, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const { STORM_LOG_TRACE("Starting " << (explorationInformation.useLocalECDetection() ? "local" : "global") << "EC detection."); ++stats.ecDetections; // Outline: // 1. construct a sparse transition matrix of the relevant part of the state space. // 2. use this matrix to compute an MEC decomposition. // 3. if non-empty, analyze the decomposition for accepting/rejecting MECs. // Start with 1. storm::storage::SparseMatrixBuilder builder(0, 0, 0, false, true, 0); // Determine the set of states that was expanded. std::vector relevantStates; if (explorationInformation.useLocalECDetection()) { for (auto const& stateActionPair : stack) { if (explorationInformation.maximize() || !comparator.isOne(bounds.getLowerBoundForState(stateActionPair.first, explorationInformation))) { relevantStates.push_back(stateActionPair.first); } } std::sort(relevantStates.begin(), relevantStates.end()); auto newEnd = std::unique(relevantStates.begin(), relevantStates.end()); relevantStates.resize(std::distance(relevantStates.begin(), newEnd)); } else { for (StateType state = 0; state < explorationInformation.stateStorage.numberOfStates; ++state) { // Add the state to the relevant states if it's unexplored. Additionally, if we are computing minimal // probabilities, we only consider it relevant if it's not a target state. if (!explorationInformation.isUnexplored(state) && (explorationInformation.maximize() || !comparator.isOne(bounds.getLowerBoundForState(state, explorationInformation)))) { relevantStates.push_back(state); } } } StateType sink = relevantStates.size(); // Create a mapping for faster look-up during the translation of flexible matrix to the real sparse matrix. std::unordered_map relevantStateToNewRowGroupMapping; for (StateType index = 0; index < relevantStates.size(); ++index) { relevantStateToNewRowGroupMapping.emplace(relevantStates[index], index); } // Do the actual translation. StateType currentRow = 0; for (auto const& state : relevantStates) { builder.newRowGroup(currentRow); StateType rowGroup = explorationInformation.getRowGroup(state); for (auto row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) { ValueType unexpandedProbability = storm::utility::zero(); for (auto const& entry : explorationInformation.getRowOfMatrix(row)) { auto it = relevantStateToNewRowGroupMapping.find(entry.getColumn()); if (it != relevantStateToNewRowGroupMapping.end()) { // If the entry is a relevant state, we copy it over (and compensate for the offset change). builder.addNextValue(currentRow, it->second, entry.getValue()); } else { // If the entry is an unexpanded state, we gather the probability to later redirect it to an unexpanded sink. unexpandedProbability += entry.getValue(); } } if (unexpandedProbability != storm::utility::zero()) { builder.addNextValue(currentRow, sink, unexpandedProbability); } ++currentRow; } } // Then, make the unexpanded state absorbing. builder.newRowGroup(currentRow); builder.addNextValue(currentRow, sink, storm::utility::one()); STORM_LOG_TRACE("Successfully built matrix for MEC decomposition."); // Go on to step 2. storm::storage::SparseMatrix relevantStatesMatrix = builder.build(); storm::storage::MaximalEndComponentDecomposition mecDecomposition(relevantStatesMatrix, relevantStatesMatrix.transpose(true)); STORM_LOG_TRACE("Successfully computed MEC decomposition. Found " << (mecDecomposition.size() > 1 ? (mecDecomposition.size() - 1) : 0) << " MEC(s)."); // If the decomposition contains only the MEC consisting of the sink state, we count it as 'failed'. if (mecDecomposition.size() <= 1) { ++stats.failedEcDetections; // explorationInformation.increasePathLengthUntilEndComponentDetection(); return false; } else { stats.totalNumberOfEcDetected += mecDecomposition.size() - 1; // 3. Analyze the MEC decomposition. for (auto const& mec : mecDecomposition) { // Ignore the (expected) MEC of the sink state. if (mec.containsState(sink)) { continue; } if (explorationInformation.maximize()) { analyzeMecForMaximalProbabilities(mec, relevantStates, relevantStatesMatrix, explorationInformation, bounds); } else { analyzeMecForMinimalProbabilities(mec, relevantStates, relevantStatesMatrix, explorationInformation, bounds); } } } return true; } template void SparseMdpLearningModelChecker::analyzeMecForMaximalProbabilities(storm::storage::MaximalEndComponent const& mec, std::vector const& relevantStates, storm::storage::SparseMatrix const& relevantStatesMatrix, ExplorationInformation& explorationInformation, BoundValues& bounds) const { // For maximal probabilities, we check (a) which MECs contain a target state, because the associated states // have a probability of 1 (and we can therefore set their lower bounds to 1) and (b) which of the remaining // MECs have no outgoing action, because the associated states have a probability of 0 (and we can therefore // set their upper bounds to 0). bool containsTargetState = false; // Now we record all choices leaving the EC. std::vector leavingActions; for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. StateType originalState = relevantStates[stateAndChoices.first]; StateType originalRowGroup = explorationInformation.getRowGroup(originalState); // Check whether a target state is contained in the MEC. if (!containsTargetState && comparator.isOne(bounds.getLowerBoundForRowGroup(originalRowGroup))) { containsTargetState = true; } // For each state, compute the actions that leave the MEC. auto includedChoicesIt = stateAndChoices.second.begin(); auto includedChoicesIte = stateAndChoices.second.end(); for (auto action = explorationInformation.getStartRowOfGroup(originalRowGroup); action < explorationInformation.getStartRowOfGroup(originalRowGroup + 1); ++action) { if (includedChoicesIt != includedChoicesIte) { STORM_LOG_TRACE("Next (local) choice contained in MEC is " << (*includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first])); STORM_LOG_TRACE("Current (local) choice iterated is " << (action - explorationInformation.getStartRowOfGroup(originalRowGroup))); if (action - explorationInformation.getStartRowOfGroup(originalRowGroup) != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) { STORM_LOG_TRACE("Choice leaves the EC."); leavingActions.push_back(action); } else { STORM_LOG_TRACE("Choice stays in the EC."); ++includedChoicesIt; } } else { STORM_LOG_TRACE("Choice leaves the EC, because there is no more choice staying in the EC."); leavingActions.push_back(action); } } } // If one of the states of the EC is a target state, all states in the EC have probability 1. if (containsTargetState) { STORM_LOG_TRACE("MEC contains a target state."); for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. StateType const& originalState = relevantStates[stateAndChoices.first]; STORM_LOG_TRACE("Setting lower bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 1."); bounds.setLowerBoundForState(originalState, explorationInformation, storm::utility::one()); explorationInformation.addTerminalState(originalState); } } else if (leavingActions.empty()) { STORM_LOG_TRACE("MEC's leaving choices are empty."); // If there is no choice leaving the EC, but it contains no target state, all states have probability 0. for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. StateType const& originalState = relevantStates[stateAndChoices.first]; STORM_LOG_TRACE("Setting upper bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 0."); bounds.setUpperBoundForState(originalState, explorationInformation, storm::utility::zero()); explorationInformation.addTerminalState(originalState); } } else { // In this case, no target state is contained in the MEC, but there are actions leaving the MEC. To // prevent the simulation getting stuck in this MEC again, we replace all states in the MEC by a new // state whose outgoing actions are the ones leaving the MEC. We do this, by assigning all states in the // MEC to a new row group, which will then hold all the outgoing choices. // Remap all contained states to the new row group. StateType nextRowGroup = explorationInformation.getNextRowGroup(); for (auto const& stateAndChoices : mec) { explorationInformation.assignStateToRowGroup(stateAndChoices.first, nextRowGroup); } bounds.initializeBoundsForNextState(); // Add to the new row group all leaving actions of contained states and set the appropriate bounds for // the actions and the new state. std::pair stateBounds = getLowestBounds(explorationInformation.optimizationDirection); for (auto const& action : leavingActions) { explorationInformation.matrix.emplace_back(std::move(explorationInformation.matrix[action])); std::pair const& actionBounds = bounds.getBoundsForAction(action); bounds.initializeBoundsForNextAction(actionBounds); stateBounds = combineBounds(explorationInformation.optimizationDirection, stateBounds, actionBounds); } // Terminate the row group of the newly introduced state. explorationInformation.rowGroupIndices.push_back(explorationInformation.matrix.size()); } } template void SparseMdpLearningModelChecker::analyzeMecForMinimalProbabilities(storm::storage::MaximalEndComponent const& mec, std::vector const& relevantStates, storm::storage::SparseMatrix const& relevantStatesMatrix, ExplorationInformation& explorationInformation, BoundValues& bounds) const { // For minimal probabilities, all found MECs are guaranteed to not contain a target state. Hence, in all // associated states, the probability is 0 and we can set the upper bounds of the states to 0). for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. StateType originalState = relevantStates[stateAndChoices.first]; bounds.setUpperBoundForState(originalState, explorationInformation, storm::utility::zero()); explorationInformation.addTerminalState(originalState); } } template ValueType SparseMdpLearningModelChecker::computeLowerBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { ValueType result = storm::utility::zero(); for (auto const& element : explorationInformation.getRowOfMatrix(action)) { result += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation); } return result; } template ValueType SparseMdpLearningModelChecker::computeUpperBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { ValueType result = storm::utility::zero(); for (auto const& element : explorationInformation.getRowOfMatrix(action)) { result += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation); } return result; } template std::pair SparseMdpLearningModelChecker::computeBoundsOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { // TODO: take into account self-loops? std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); for (auto const& element : explorationInformation.getRowOfMatrix(action)) { result.first += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation); result.second += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation); } return result; } template std::pair SparseMdpLearningModelChecker::computeBoundsOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { StateType group = explorationInformation.getRowGroup(currentStateId); std::pair result = getLowestBounds(explorationInformation.optimizationDirection); for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) { std::pair actionValues = computeBoundsOfAction(action, explorationInformation, bounds); result = combineBounds(explorationInformation.optimizationDirection, result, actionValues); } return result; } template void SparseMdpLearningModelChecker::updateProbabilityBoundsAlongSampledPath(StateActionStack& stack, ExplorationInformation const& explorationInformation, BoundValues& bounds) const { stack.pop_back(); while (!stack.empty()) { updateProbabilityOfAction(stack.back().first, stack.back().second, explorationInformation, bounds); stack.pop_back(); } } template void SparseMdpLearningModelChecker::updateProbabilityOfAction(StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues& bounds) const { // Compute the new lower/upper values of the action. std::pair newBoundsForAction = computeBoundsOfAction(action, explorationInformation, bounds); // And set them as the current value. bounds.setBoundsForAction(action, newBoundsForAction); // Check if we need to update the values for the states. if (explorationInformation.maximize()) { bounds.setLowerBoundOfStateIfGreaterThanOld(state, explorationInformation, newBoundsForAction.first); StateType rowGroup = explorationInformation.getRowGroup(state); if (newBoundsForAction.second < bounds.getUpperBoundForRowGroup(rowGroup)) { if (explorationInformation.getRowGroupSize(rowGroup) > 1) { newBoundsForAction.second = std::max(newBoundsForAction.second, computeBoundOverAllOtherActions(storm::OptimizationDirection::Maximize, state, action, explorationInformation, bounds)); } bounds.setUpperBoundForRowGroup(rowGroup, newBoundsForAction.second); } } else { bounds.setUpperBoundOfStateIfLessThanOld(state, explorationInformation, newBoundsForAction.second); StateType rowGroup = explorationInformation.getRowGroup(state); if (bounds.getLowerBoundForRowGroup(rowGroup) < newBoundsForAction.first) { if (explorationInformation.getRowGroupSize(rowGroup) > 1) { ValueType min = computeBoundOverAllOtherActions(storm::OptimizationDirection::Minimize, state, action, explorationInformation, bounds); newBoundsForAction.first = std::min(newBoundsForAction.first, min); } bounds.setLowerBoundForRowGroup(rowGroup, newBoundsForAction.first); } } } template ValueType SparseMdpLearningModelChecker::computeBoundOverAllOtherActions(storm::OptimizationDirection const& direction, StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const { ValueType bound = getLowestBound(explorationInformation.optimizationDirection); ActionType group = explorationInformation.getRowGroup(state); for (auto currentAction = explorationInformation.getStartRowOfGroup(group); currentAction < explorationInformation.getStartRowOfGroup(group + 1); ++currentAction) { if (currentAction == action) { continue; } if (direction == storm::OptimizationDirection::Maximize) { bound = std::max(bound, computeUpperBoundOfAction(currentAction, explorationInformation, bounds)); } else { bound = std::min(bound, computeLowerBoundOfAction(currentAction, explorationInformation, bounds)); } } return bound; } template std::pair SparseMdpLearningModelChecker::getLowestBounds(storm::OptimizationDirection const& direction) const { ValueType val = getLowestBound(direction); return std::make_pair(val, val); } template ValueType SparseMdpLearningModelChecker::getLowestBound(storm::OptimizationDirection const& direction) const { if (direction == storm::OptimizationDirection::Maximize) { return storm::utility::zero(); } else { return storm::utility::one(); } } template std::pair SparseMdpLearningModelChecker::combineBounds(storm::OptimizationDirection const& direction, std::pair const& bounds1, std::pair const& bounds2) const { if (direction == storm::OptimizationDirection::Maximize) { return std::make_pair(std::max(bounds1.first, bounds2.first), std::max(bounds1.second, bounds2.second)); } else { return std::make_pair(std::min(bounds1.first, bounds2.first), std::min(bounds1.second, bounds2.second)); } } template class SparseMdpLearningModelChecker; } }