From 034cf626a0fb230c30f3c85c5858dcc7cc8ce6ad Mon Sep 17 00:00:00 2001 From: dehnert Date: Wed, 30 Mar 2016 18:28:49 +0200 Subject: [PATCH] more work on learning-based engin Former-commit-id: bbcf67abd1ce7381468984b7d6b8bec253872157 --- .../SparseMdpLearningModelChecker.cpp | 314 ++++++++++++++---- .../SparseMdpLearningModelChecker.h | 13 +- 2 files changed, 261 insertions(+), 66 deletions(-) diff --git a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp index f96308e05..d01ed5156 100644 --- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp +++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp @@ -29,29 +29,124 @@ namespace storm { } template - void SparseMdpLearningModelChecker::updateProbabilities(StateType const& sourceStateId, uint32_t action, StateType const& targetStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds) const { + void SparseMdpLearningModelChecker::updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const { // Find out which row of the matrix we have to consider for the given action. StateType sourceRowGroup = stateToRowGroupMapping[sourceStateId]; - StateType sourceRow = sourceRowGroup + action; + StateType sourceRow = rowGroupIndices[sourceRowGroup] + action; // Compute the new lower/upper values of the action. ValueType newLowerValue = storm::utility::zero(); ValueType newUpperValue = storm::utility::zero(); + boost::optional loopProbability; for (auto const& element : transitionMatrix[sourceRow]) { - newLowerValue += element.getValue() * upperBounds[stateToRowGroupMapping[element.getColumn()]]; - newUpperValue += element.getValue() * lowerBounds[stateToRowGroupMapping[element.getColumn()]]; + // If the element is a self-loop, we treat the probability by a proper rescaling after the loop. + if (element.getColumn() == sourceStateId) { + STORM_LOG_TRACE("Found self-loop with probability " << element.getValue() << "."); + loopProbability = element.getValue(); + continue; + } + + newLowerValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + newUpperValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + + // If there was a self-loop with probability p, we scale the probabilities by 1/(1-p). + if (loopProbability) { + STORM_LOG_TRACE("Scaling values " << newLowerValue << " and " << newUpperValue << " with " << (storm::utility::one()/(storm::utility::one() - loopProbability.get())) << "."); + newLowerValue = newLowerValue / (storm::utility::one() - loopProbability.get()); + newUpperValue = newUpperValue / (storm::utility::one() - loopProbability.get()); } // And set them as the current value. - lowerBounds[stateToRowGroupMapping[sourceStateId]] = newLowerValue; - upperBounds[stateToRowGroupMapping[sourceStateId]] = newUpperValue; + lowerBoundsPerAction[rowGroupIndices[stateToRowGroupMapping[sourceStateId]] + action] = newLowerValue; + STORM_LOG_TRACE("Updating lower value of action " << action << " of state " << sourceStateId << " to " << newLowerValue << "."); + upperBoundsPerAction[rowGroupIndices[stateToRowGroupMapping[sourceStateId]] + action] = newUpperValue; + std::cout << "writing " << newUpperValue << " at index " << (rowGroupIndices[stateToRowGroupMapping[sourceStateId]] + action) << std::endl; + STORM_LOG_TRACE("Updating upper value of action " << action << " of state " << sourceStateId << " to " << newUpperValue << "."); + + // Check if we need to update the values for the states. + if (lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] < newLowerValue) { + lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] = newLowerValue; + STORM_LOG_TRACE("Got new lower bound for state " << sourceStateId << ": " << newLowerValue << "."); + } + if (upperBoundsPerState[stateToRowGroupMapping[sourceStateId]] > newUpperValue) { + upperBoundsPerState[stateToRowGroupMapping[sourceStateId]] = newUpperValue; + STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << "."); + } + } + + template + void SparseMdpLearningModelChecker::updateProbabilitiesUsingStack(std::vector>& stateActionStack, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const { + + std::cout << "stack:" << std::endl; + for (auto const& entry : stateActionStack) { + std::cout << entry.first << " -[" << entry.second << "]-> "; + } + std::cout << std::endl; + + while (stateActionStack.size() > 1) { + stateActionStack.pop_back(); + + updateProbabilities(stateActionStack.back().first, stateActionStack.back().second, transitionMatrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + + } } template - void SparseMdpLearningModelChecker::updateProbabilitiesUsingStack(std::vector>& stateActionStack, StateType const& currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds) const { - while (!stateActionStack.empty()) { - updateProbabilities(stateActionStack.back().first, stateActionStack.back().second, currentStateId, transitionMatrix, rowGroupIndices, stateToRowGroupMapping, lowerBounds, upperBounds); + uint32_t SparseMdpLearningModelChecker::sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& upperBoundsPerState, StateType const& unexploredMarker) { + + StateType rowGroup = stateToRowGroupMapping[currentStateId]; + STORM_LOG_TRACE("Row group for action sampling is " << rowGroup << "."); + + // First, determine all maximizing actions. + std::vector allMaxActions; + + // Determine the maximal value of any action. +// ValueType max = 0; +// for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { +// ValueType current = 0; +// for (auto const& element : transitionMatrix[row]) { +// current += element.getValue() * upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]; +// } +// +// max = std::max(max, current); +// } + + STORM_LOG_TRACE("Looking for action with value " << upperBoundsPerState[stateToRowGroupMapping[currentStateId]] << "."); + for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { + ValueType current = 0; + for (auto const& element : transitionMatrix[row]) { + std::cout << "+= " << element.getValue() << " * " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << " (col: " << element.getColumn() << " // row (grp) " << stateToRowGroupMapping[element.getColumn()] << ")" << std::endl; + current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << "."); + + // If the action is one of the maximizing ones, insert it into our list. + // TODO: should this need to be an approximate check? + if (current == upperBoundsPerState[stateToRowGroupMapping[currentStateId]]) { + allMaxActions.push_back(row); + std::cout << "found maximizing action " << row << std::endl; + } } + + STORM_LOG_ASSERT(!allMaxActions.empty(), "Must have at least one maximizing action."); + + // Now sample from all maximizing actions. + std::uniform_int_distribution distribution(0, allMaxActions.size() - 1); + return allMaxActions[distribution(generator)] - rowGroupIndices[rowGroup]; + } + + template + typename SparseMdpLearningModelChecker::StateType SparseMdpLearningModelChecker::sampleSuccessorFromAction(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping) { + uint32_t row = rowGroupIndices[stateToRowGroupMapping[currentStateId]]; + + // TODO: this can be precomputed. + std::vector probabilities(transitionMatrix[row].size()); + std::transform(transitionMatrix[row].begin(), transitionMatrix[row].end(), probabilities.begin(), [] (storm::storage::MatrixEntry const& entry) { return entry.getValue(); } ); + + // Now sample according to the probabilities. + std::discrete_distribution distribution(probabilities.begin(), probabilities.end()); + return transitionMatrix[row][distribution(generator)].getColumn(); } template @@ -74,22 +169,26 @@ namespace storm { // A container that stores the transitions found so far. std::vector>> matrix; - + // A vector storing where the row group of each state starts. std::vector rowGroupIndices; + rowGroupIndices.push_back(0); // A vector storing the mapping from state ids to row groups. std::vector stateToRowGroupMapping; + StateType unexploredMarker = std::numeric_limits::max(); // Vectors to store the lower/upper bounds for each action (in each state). - std::vector lowerBounds; - std::vector upperBounds; - + std::vector lowerBoundsPerAction; + std::vector upperBoundsPerAction; + std::vector lowerBoundsPerState; + std::vector upperBoundsPerState; + // A mapping of unexplored IDs to their actual compressed states. std::unordered_map unexploredStates; // Create a callback for the next-state generator to enable it to request the index of states. - std::function stateToIdCallback = [&stateStorage, &stateToRowGroupMapping, &unexploredStates] (storm::generator::CompressedState const& state) -> StateType { + std::function stateToIdCallback = [&stateStorage, &stateToRowGroupMapping, &unexploredStates, &unexploredMarker] (storm::generator::CompressedState const& state) -> StateType { StateType newIndex = stateStorage.numberOfStates; // Check, if the state was already registered. @@ -97,84 +196,171 @@ namespace storm { if (actualIndexBucketPair.first == newIndex) { ++stateStorage.numberOfStates; - stateToRowGroupMapping.push_back(0); + stateToRowGroupMapping.push_back(unexploredMarker); unexploredStates[newIndex] = state; } return actualIndexBucketPair.first; }; - + stateStorage.initialStateIndices = generator.getInitialStates(stateToIdCallback); STORM_LOG_THROW(stateStorage.initialStateIndices.size() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine."); // Now perform the actual sampling. std::vector> stateActionStack; - stateActionStack.push_back(std::make_pair(stateStorage.initialStateIndices.front(), 0)); - bool foundTargetState = false; - while (!foundTargetState) { - StateType const& currentStateId = stateActionStack.back().first; - STORM_LOG_TRACE("State on top of stack is: " << currentStateId << "."); - - // If the state is not yet expanded, we need to retrieve its behaviors. - auto unexploredIt = unexploredStates.find(currentStateId); - if (unexploredIt != unexploredStates.end()) { - STORM_LOG_TRACE("State was not yet expanded."); + uint_fast64_t iteration = 0; + bool convergenceCriterionMet = false; + while (!convergenceCriterionMet) { + // Start the search from the initial state. + stateActionStack.push_back(std::make_pair(stateStorage.initialStateIndices.front(), 0)); - // First, we need to get the compressed state back from the id. - STORM_LOG_ASSERT(unexploredIt != unexploredStates.end(), "Unable to find unexplored state " << currentStateId << "."); - storm::storage::BitVector const& currentState = unexploredIt->second; + bool foundTargetState = false; + while (!foundTargetState) { + StateType const& currentStateId = stateActionStack.back().first; + STORM_LOG_TRACE("State on top of stack is: " << currentStateId << "."); - // Before generating the behavior of the state, we need to determine whether it's a target state that - // does not need to be expanded. - generator.load(currentState); - if (generator.satisfies(targetStateExpression)) { - STORM_LOG_TRACE("State does not need to be expanded, because it is a target state."); - - // If it's in fact a goal state, we need to go backwards in the stack and update the probabilities. - foundTargetState = true; - stateActionStack.pop_back(); + // If the state is not yet expanded, we need to retrieve its behaviors. + auto unexploredIt = unexploredStates.find(currentStateId); + if (unexploredIt != unexploredStates.end()) { + STORM_LOG_TRACE("State was not yet expanded."); - STORM_LOG_TRACE("Updating probabilities along states in stack."); - updateProbabilitiesUsingStack(stateActionStack, currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBounds, upperBounds); - } else { - STORM_LOG_TRACE("Expanding state."); - - // If it needs to be expanded, we use the generator to retrieve the behavior of the new state. - storm::generator::StateBehavior behavior = generator.expand(stateToIdCallback); - STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices."); + // Map the unexplored state to a row group. + stateToRowGroupMapping[currentStateId] = rowGroupIndices.size() - 1; + STORM_LOG_TRACE("Assigning row group " << stateToRowGroupMapping[currentStateId] << " to state " << currentStateId << "."); + lowerBoundsPerState.push_back(storm::utility::zero()); + upperBoundsPerState.push_back(storm::utility::one()); - stateToRowGroupMapping[currentStateId] = rowGroupIndices.size(); - rowGroupIndices.push_back(matrix.size()); + // We need to get the compressed state back from the id to explore it. + STORM_LOG_ASSERT(unexploredIt != unexploredStates.end(), "Unable to find unexplored state " << currentStateId << "."); + storm::storage::BitVector const& currentState = unexploredIt->second; - // Next, we insert the behavior into our matrix structure. - for (auto const& choice : behavior) { + // Before generating the behavior of the state, we need to determine whether it's a target state that + // does not need to be expanded. + generator.load(currentState); + if (generator.satisfies(targetStateExpression)) { + STORM_LOG_TRACE("State does not need to be expanded, because it is a target state."); + + // If it's in fact a goal state, we need to go backwards in the stack and update the probabilities. + foundTargetState = true; + lowerBoundsPerState.back() = storm::utility::one(); + + // Set the lower/upper bounds for the only (dummy) action. + lowerBoundsPerAction.push_back(storm::utility::one()); + upperBoundsPerAction.push_back(storm::utility::one()); + + // Increase the size of the matrix, but leave the row empty. matrix.resize(matrix.size() + 1); - for (auto const& entry : choice) { - matrix.back().push_back(storm::storage::MatrixEntry(entry.first, entry.second)); + + STORM_LOG_TRACE("Updating probabilities along states in stack."); + updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + } else { + STORM_LOG_TRACE("Expanding state."); + + // If it needs to be expanded, we use the generator to retrieve the behavior of the new state. + storm::generator::StateBehavior behavior = generator.expand(stateToIdCallback); + STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices."); + + // Clumsily check whether we have found a state that forms a trivial BMEC. + bool isNonacceptingTerminalState = false; + if (behavior.getNumberOfChoices() == 0) { + isNonacceptingTerminalState = true; + } else if (behavior.getNumberOfChoices() == 1) { + auto const& onlyChoice = *behavior.begin(); + if (onlyChoice.size() == 1) { + auto const& onlyEntry = *onlyChoice.begin(); + if (onlyEntry.first == currentStateId) { + isNonacceptingTerminalState = true; + } + } + } + if (isNonacceptingTerminalState) { + STORM_LOG_TRACE("State does not need to be expanded, because it forms a trivial BMEC."); + + foundTargetState = true; + upperBoundsPerState.back() = storm::utility::zero(); + + // Set the lower/upper bounds for the only (dummy) action. + lowerBoundsPerAction.push_back(storm::utility::zero()); + upperBoundsPerAction.push_back(storm::utility::zero()); + + // Increase the size of the matrix, but leave the row empty. + matrix.resize(matrix.size() + 1); + + STORM_LOG_TRACE("Updating probabilities along states in stack."); + updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + } + + // If the state was neither a trivial (non-accepting) BMEC nor a target state, we need to store + // its behavior. + if (!foundTargetState) { + // Next, we insert the behavior into our matrix structure. + matrix.resize(matrix.size() + behavior.getNumberOfChoices()); + uint32_t currentAction = 0; + for (auto const& choice : behavior) { + for (auto const& entry : choice) { + std::cout << "got " << currentStateId << " (row group " << stateToRowGroupMapping[currentStateId] << ") " << " -> " << entry.first << " with prob " << entry.second << std::endl; + matrix.back().emplace_back(entry.first, entry.second); + } + + lowerBoundsPerAction.push_back(storm::utility::zero()); + upperBoundsPerAction.push_back(storm::utility::one()); + + // Update the bounds for the explored states to initially let them have the correct value. + updateProbabilities(currentStateId, currentAction, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + + ++currentAction; + } } } // Now that we have explored the state, we can dispose of it. unexploredStates.erase(unexploredIt); + + // Terminate the row group. + rowGroupIndices.push_back(matrix.size()); + } else { + // If the state was explored before, but determined to be a terminal state of the exploration, + // we need to determine this now. + if (matrix[rowGroupIndices[stateToRowGroupMapping[currentStateId]]].empty()) { + foundTargetState = true; + } + } + + if (!foundTargetState) { + // At this point, we can be sure that the state was expanded and that we can sample according to the + // probabilities in the matrix. + uint32_t chosenAction = sampleFromMaxActions(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, upperBoundsPerAction, unexploredMarker); + stateActionStack.back().second = chosenAction; + STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << "."); + + StateType successor = sampleSuccessorFromAction(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping); + STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << "."); + + // Put the successor state and a dummy action on top of the stack. + stateActionStack.emplace_back(successor, 0); } } - if (!foundTargetState) { - // At this point, we can be sure that the state was expanded and that we can sample according to the - // probabilities in the matrix. - STORM_LOG_TRACE("Sampling action in state."); - uint32_t chosenAction = 0; + for (auto const& el : lowerBoundsPerState) { + std::cout << el << " - "; + } + std::cout << std::endl; - STORM_LOG_TRACE("Sampling successor state according to action " << chosenAction << "."); - break; - - // TODO: set action of topmost stack element - // TOOD: determine if end component (state) - + for (auto const& el : upperBoundsPerState) { + std::cout << el << " - "; } - } + std::cout << std::endl; + STORM_LOG_TRACE("Lower bound is " << lowerBoundsPerState[0] << "."); + STORM_LOG_TRACE("Upper bound is " << upperBoundsPerState[0] << "."); + ValueType difference = upperBoundsPerState[0] - lowerBoundsPerState[0]; + STORM_LOG_TRACE("Difference after iteration " << iteration << " is " << difference << "."); + convergenceCriterionMet = difference < 1e-6; + + ++iteration; + } + return nullptr; } diff --git a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h index f63ce40a4..90e72d85a 100644 --- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h +++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h @@ -1,6 +1,8 @@ #ifndef STORM_MODELCHECKER_REACHABILITY_SPARSEMDPLEARNINGMODELCHECKER_H_ #define STORM_MODELCHECKER_REACHABILITY_SPARSEMDPLEARNINGMODELCHECKER_H_ +#include + #include "src/modelchecker/AbstractModelChecker.h" #include "src/storage/prism/Program.h" @@ -25,15 +27,22 @@ namespace storm { virtual std::unique_ptr computeReachabilityProbabilities(CheckTask const& checkTask) override; private: - void updateProbabilities(StateType const& sourceStateId, uint32_t action, StateType const& targetStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds) const; + void updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const; + + void updateProbabilitiesUsingStack(std::vector>& stateActionStack, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const; - void updateProbabilitiesUsingStack(std::vector>& stateActionStack, StateType const& currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds) const; + uint32_t sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& upperBounds, StateType const& unexploredMarker); + + StateType sampleSuccessorFromAction(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping); // The program that defines the model to check. storm::prism::Program program; // The variable information. storm::generator::VariableInformation variableInformation; + + // The random number generator. + std::default_random_engine generator; }; } }