diff --git a/src/cli/entrypoints.h b/src/cli/entrypoints.h index d14f8848c..505ec0cd3 100644 --- a/src/cli/entrypoints.h +++ b/src/cli/entrypoints.h @@ -61,7 +61,7 @@ namespace storm { STORM_LOG_THROW(program.getModelType() == storm::prism::Program::ModelType::DTMC || program.getModelType() == storm::prism::Program::ModelType::MDP, storm::exceptions::InvalidSettingsException, "Currently learning-based verification is only available for DTMCs and MDPs."); std::cout << std::endl << "Model checking property: " << *formula << " ..."; storm::modelchecker::CheckTask task(*formula, onlyInitialStatesRelevant); - storm::modelchecker::SparseMdpLearningModelChecker checker(program); + storm::modelchecker::SparseMdpLearningModelChecker checker(program, storm::utility::prism::parseConstantDefinitionString(program, storm::settings::generalSettings().getConstantDefinitionString())); std::unique_ptr result; if (checker.canHandle(task)) { result = checker.check(task); diff --git a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp index ed51aa0fa..aeac6776a 100644 --- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp +++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp @@ -23,7 +23,8 @@ namespace storm { namespace modelchecker { template - SparseMdpLearningModelChecker::SparseMdpLearningModelChecker(storm::prism::Program const& program) : program(storm::utility::prism::preprocessProgram(program)), variableInformation(this->program), randomGenerator(std::chrono::system_clock::now().time_since_epoch().count()), comparator(1e-9) { + SparseMdpLearningModelChecker::SparseMdpLearningModelChecker(storm::prism::Program const& program, boost::optional> const& constantDefinitions) : program(storm::utility::prism::preprocessProgram(program, constantDefinitions)), variableInformation(this->program), //randomGenerator(std::chrono::system_clock::now().time_since_epoch().count()), + comparator(1e-9) { // Intentionally left empty. } @@ -37,91 +38,199 @@ namespace storm { template void SparseMdpLearningModelChecker::updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const { // Find out which row of the matrix we have to consider for the given action. - StateType sourceRowGroup = stateToRowGroupMapping[sourceStateId]; - StateType sourceRow = rowGroupIndices[sourceRowGroup] + action; + StateType sourceRow = action; // Compute the new lower/upper values of the action. ValueType newLowerValue = storm::utility::zero(); ValueType newUpperValue = storm::utility::zero(); - boost::optional loopProbability; + +// boost::optional loopProbability; for (auto const& element : transitionMatrix[sourceRow]) { // If the element is a self-loop, we treat the probability by a proper rescaling after the loop. - if (element.getColumn() == sourceStateId) { - STORM_LOG_TRACE("Found self-loop with probability " << element.getValue() << "."); - loopProbability = element.getValue(); - continue; - } +// if (element.getColumn() == sourceStateId) { +// STORM_LOG_TRACE("Found self-loop with probability " << element.getValue() << "."); +// loopProbability = element.getValue(); +// continue; +// } + std::cout << "lower += " << element.getValue() << " * state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl; + if (stateToRowGroupMapping[element.getColumn()] != unexploredMarker) { + std::cout << "upper bounds per state @ " << stateToRowGroupMapping[element.getColumn()] << " is " << upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]] << std::endl; + } + std::cout << "upper += " << element.getValue() << " * state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl; newLowerValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); newUpperValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + std::cout << "after iter " << newLowerValue << " and " << newUpperValue << std::endl; } // If there was a self-loop with probability p, we scale the probabilities by 1/(1-p). - if (loopProbability) { - STORM_LOG_TRACE("Scaling values " << newLowerValue << " and " << newUpperValue << " with " << (storm::utility::one()/(storm::utility::one() - loopProbability.get())) << "."); - newLowerValue = newLowerValue / (storm::utility::one() - loopProbability.get()); - newUpperValue = newUpperValue / (storm::utility::one() - loopProbability.get()); - } +// if (loopProbability) { +// STORM_LOG_TRACE("Scaling values " << newLowerValue << " and " << newUpperValue << " with " << (storm::utility::one()/(storm::utility::one() - loopProbability.get())) << "."); +// newLowerValue = newLowerValue / (storm::utility::one() - loopProbability.get()); +// newUpperValue = newUpperValue / (storm::utility::one() - loopProbability.get()); +// } // And set them as the current value. - lowerBoundsPerAction[rowGroupIndices[stateToRowGroupMapping[sourceStateId]] + action] = newLowerValue; - STORM_LOG_TRACE("Updating lower value of action " << action << " of state " << sourceStateId << " to " << newLowerValue << "."); - upperBoundsPerAction[rowGroupIndices[stateToRowGroupMapping[sourceStateId]] + action] = newUpperValue; - STORM_LOG_TRACE("Updating upper value of action " << action << " of state " << sourceStateId << " to " << newUpperValue << "."); + std::cout << newLowerValue << " vs " << newUpperValue << std::endl; + STORM_LOG_ASSERT(newLowerValue <= newUpperValue, "Lower bound must always be smaller or equal than upper bound."); + STORM_LOG_TRACE("Updating lower value of action " << action << " of state " << sourceStateId << " to " << newLowerValue << " (was " << lowerBoundsPerAction[action] << ")."); + lowerBoundsPerAction[action] = newLowerValue; + STORM_LOG_TRACE("Updating upper value of action " << action << " of state " << sourceStateId << " to " << newUpperValue << " (was " << upperBoundsPerAction[action] << ")."); + upperBoundsPerAction[action] = newUpperValue; // Check if we need to update the values for the states. if (lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] < newLowerValue) { + STORM_LOG_TRACE("Got new lower bound for state " << sourceStateId << ": " << newLowerValue << " (was " << lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] << ")."); lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] = newLowerValue; - STORM_LOG_TRACE("Got new lower bound for state " << sourceStateId << ": " << newLowerValue << "."); } - if (upperBoundsPerState[stateToRowGroupMapping[sourceStateId]] > newUpperValue) { - upperBoundsPerState[stateToRowGroupMapping[sourceStateId]] = newUpperValue; - STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << "."); + + uint32_t sourceRowGroup = stateToRowGroupMapping[sourceStateId]; + if (newUpperValue < upperBoundsPerState[sourceRowGroup]) { + if (rowGroupIndices[sourceRowGroup + 1] - rowGroupIndices[sourceRowGroup] > 1) { + ValueType max = storm::utility::zero(); + + for (uint32_t currentAction = rowGroupIndices[sourceRowGroup]; currentAction < rowGroupIndices[sourceRowGroup + 1]; ++currentAction) { + std::cout << "cur: " << currentAction << std::endl; + if (currentAction == action) { + continue; + } + + ValueType currentValue = storm::utility::zero(); + for (auto const& element : transitionMatrix[currentAction]) { + currentValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + max = std::max(max, currentValue); + std::cout << "max is " << max << std::endl; + } + + newUpperValue = std::max(newUpperValue, max); + } + + if (newUpperValue < upperBoundsPerState[sourceRowGroup]) { + STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << " (was " << upperBoundsPerState[sourceRowGroup] << ")."); + std::cout << "writing at index " << sourceRowGroup << std::endl; + upperBoundsPerState[sourceRowGroup] = newUpperValue; + } } } template void SparseMdpLearningModelChecker::updateProbabilitiesUsingStack(std::vector>& stateActionStack, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const { - while (stateActionStack.size() > 1) { - stateActionStack.pop_back(); - + std::cout << "stack is:" << std::endl; + for (auto const& el : stateActionStack) { + std::cout << el.first << "-[" << el.second << "]-> "; + } + std::cout << std::endl; + + stateActionStack.pop_back(); + while (!stateActionStack.empty()) { updateProbabilities(stateActionStack.back().first, stateActionStack.back().second, transitionMatrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + stateActionStack.pop_back(); + } + } + + template + std::pair SparseMdpLearningModelChecker::computeValuesOfChoice(uint32_t action, std::vector>> const& transitionMatrix, std::vector const& stateToRowGroupMapping, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker) { + std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); + for (auto const& element : transitionMatrix[action]) { + result.first += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + result.second += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + return result; + } + + template + std::pair SparseMdpLearningModelChecker::computeValuesOfState(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& lowerBounds, std::vector const& upperBounds, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker) { + StateType sourceRowGroup = stateToRowGroupMapping[currentStateId]; + std::pair result = std::make_pair(storm::utility::zero(), storm::utility::zero()); + for (uint32_t choice = rowGroupIndices[sourceRowGroup]; choice < rowGroupIndices[sourceRowGroup + 1]; ++choice) { + std::pair choiceValues = computeValuesOfChoice(choice, transitionMatrix, stateToRowGroupMapping, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + result.first = std::max(choiceValues.first, result.first); + result.second = std::max(choiceValues.second, result.second); } + return result; } template - uint32_t SparseMdpLearningModelChecker::sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& upperBoundsPerState, StateType const& unexploredMarker) { + uint32_t SparseMdpLearningModelChecker::sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& upperBoundsPerState, std::unordered_map const& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) { StateType rowGroup = stateToRowGroupMapping[currentStateId]; - STORM_LOG_TRACE("Row group for action sampling is " << rowGroup << "."); // First, determine all maximizing actions. std::vector allMaxActions; + for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { + if (stateToRowGroupMapping[state] != unexploredMarker) { + std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [x, " << upperBoundsPerState[stateToRowGroupMapping[state]] << "]" << std::endl; + } else { + std::cout << "state " << state << " is unexplored" << std::endl; + } + } + // Determine the maximal value of any action. ValueType max = 0; - for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { - ValueType current = 0; - for (auto const& element : transitionMatrix[row]) { - current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + auto choicesInEcIt = stateToLeavingChoicesOfEndComponent.find(currentStateId); + if (choicesInEcIt != stateToLeavingChoicesOfEndComponent.end()) { + STORM_LOG_TRACE("Sampling from actions leaving the previously detected EC."); + for (auto const& row : *choicesInEcIt->second) { + ValueType current = 0; + for (auto const& element : transitionMatrix[row]) { + current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + + max = std::max(max, current); + } + } else { + STORM_LOG_TRACE("Sampling from actions leaving the state."); + for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { + ValueType current = 0; + for (auto const& element : transitionMatrix[row]) { + current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + max = std::max(max, current); } - - max = std::max(max, current); } STORM_LOG_TRACE("Looking for action with value " << max << "."); - for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { - ValueType current = 0; - for (auto const& element : transitionMatrix[row]) { - current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { + if (stateToRowGroupMapping[state] != unexploredMarker) { + std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [x, " << upperBoundsPerState[stateToRowGroupMapping[state]] << "]" << std::endl; + } else { + std::cout << "state " << state << " is unexplored" << std::endl; } - STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << "."); - - // If the action is one of the maximizing ones, insert it into our list. - if (comparator.isEqual(current, max)) { - allMaxActions.push_back(row); + } + + if (choicesInEcIt != stateToLeavingChoicesOfEndComponent.end()) { + for (auto const& row : *choicesInEcIt->second) { + ValueType current = 0; + for (auto const& element : transitionMatrix[row]) { + current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << "."); + + // If the action is one of the maximizing ones, insert it into our list. + if (comparator.isEqual(current, max)) { + allMaxActions.push_back(row); + } + } + } else { + for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) { + ValueType current = 0; + for (auto const& element : transitionMatrix[row]) { + if (stateToRowGroupMapping[element.getColumn()] != unexploredMarker) { + std::cout << "upper bounds per state @ " << stateToRowGroupMapping[element.getColumn()] << " is " << upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]] << std::endl; + } + std::cout << "+= " << element.getValue() << " * " << "state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl; + current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]); + } + STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << "."); + + // If the action is one of the maximizing ones, insert it into our list. + if (comparator.isEqual(current, max)) { + allMaxActions.push_back(row); + } } } @@ -129,12 +238,12 @@ namespace storm { // Now sample from all maximizing actions. std::uniform_int_distribution distribution(0, allMaxActions.size() - 1); - return allMaxActions[distribution(randomGenerator)] - rowGroupIndices[rowGroup]; + return allMaxActions[distribution(randomGenerator)]; } template - typename SparseMdpLearningModelChecker::StateType SparseMdpLearningModelChecker::sampleSuccessorFromAction(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping) { - uint32_t row = rowGroupIndices[stateToRowGroupMapping[currentStateId]]; + typename SparseMdpLearningModelChecker::StateType SparseMdpLearningModelChecker::sampleSuccessorFromAction(StateType chosenAction, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping) { + uint32_t row = chosenAction; // TODO: precompute this? std::vector probabilities(transitionMatrix[row].size()); @@ -143,7 +252,6 @@ namespace storm { // Now sample according to the probabilities. std::discrete_distribution distribution(probabilities.begin(), probabilities.end()); StateType offset = distribution(randomGenerator); - STORM_LOG_TRACE("Sampled " << offset << " from " << probabilities.size() << " elements."); return transitionMatrix[row][offset].getColumn(); } @@ -159,7 +267,7 @@ namespace storm { } template - void SparseMdpLearningModelChecker::detectEndComponents(std::vector> const& stateActionStack, boost::container::flat_set const& targetStates, std::vector>>& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, std::unordered_map& unexploredStates, StateType const& unexploredMarker) const { + void SparseMdpLearningModelChecker::detectEndComponents(std::vector> const& stateActionStack, boost::container::flat_set& terminalStates, std::vector>>& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, std::unordered_map& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) const { STORM_LOG_TRACE("Starting EC detection."); @@ -170,19 +278,26 @@ namespace storm { // Start with 1. storm::storage::SparseMatrixBuilder builder(0, 0, 0, false, true, 0); + + // Determine the set of states that was expanded. std::vector relevantStates; for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { if (stateToRowGroupMapping[state] != unexploredMarker) { relevantStates.push_back(state); } } + // Sort according to the actual row groups so we can insert the elements in order later. + std::sort(relevantStates.begin(), relevantStates.end(), [&stateToRowGroupMapping] (StateType const& a, StateType const& b) { return stateToRowGroupMapping[a] < stateToRowGroupMapping[b]; }); StateType unexploredState = relevantStates.size(); + // Create a mapping for faster look-up during the translation of flexible matrix to the real sparse matrix. std::unordered_map relevantStateToNewRowGroupMapping; for (StateType index = 0; index < relevantStates.size(); ++index) { + std::cout << "relevant: " << relevantStates[index] << std::endl; relevantStateToNewRowGroupMapping.emplace(relevantStates[index], index); } + // Do the actual translation. StateType currentRow = 0; for (auto const& state : relevantStates) { builder.newRowGroup(currentRow); @@ -205,7 +320,7 @@ namespace storm { ++currentRow; } } - // Finally, make the unexpanded state absorbing. + // Then, make the unexpanded state absorbing. builder.newRowGroup(currentRow); builder.addNextValue(currentRow, unexploredState, storm::utility::one()); STORM_LOG_TRACE("Successfully built matrix for MEC decomposition."); @@ -216,35 +331,76 @@ namespace storm { STORM_LOG_TRACE("Successfully computed MEC decomposition. Found " << (mecDecomposition.size() > 1 ? (mecDecomposition.size() - 1) : 0) << " MEC(s)."); // 3. Analyze the MEC decomposition. - std::unordered_map> stateToOutgoingChoices; for (auto const& mec : mecDecomposition) { // Ignore the (expected) MEC of the unexplored state. if (mec.containsState(unexploredState)) { continue; } - // Now we delete all choices that belong to the MEC. + bool containsTargetState = false; + + // Now we record all choices leaving the EC. + ChoiceSetPointer leavingChoices = std::make_shared(); for (auto const& stateAndChoices : mec) { // Compute the state of the original model that corresponds to the current state. - StateType originalState = stateToRowGroupMapping[relevantStates[relevantStateToNewRowGroupMapping.at(stateAndChoices.first)]]; + std::cout << "local state: " << stateAndChoices.first << std::endl; + StateType originalState = relevantStates[stateAndChoices.first]; + std::cout << "original state: " << originalState << std::endl; + uint32_t originalRowGroup = stateToRowGroupMapping[originalState]; + std::cout << "original row group: " << originalRowGroup << std::endl; + + // TODO: This check for a target state is a bit hackish and only works for max probabilities. + if (!containsTargetState && lowerBoundsPerState[originalRowGroup] == storm::utility::one()) { + containsTargetState = true; + } auto includedChoicesIt = stateAndChoices.second.begin(); auto includedChoicesIte = stateAndChoices.second.end(); - // Now iterate through all the choices and delete the ones included in the MEC and record outgoing - // choices (to make them available in the other states of the EC). - for (auto choice = rowGroupIndices[originalState]; choice < rowGroupIndices[originalState + 1]; ++choice) { - if (includedChoicesIt != includedChoicesIte && choice - rowGroupIndices[originalState] == *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) { - transitionMatrix[choice].clear(); - transitionMatrix[choice].shrink_to_fit(); + for (auto choice = rowGroupIndices[originalRowGroup]; choice < rowGroupIndices[originalRowGroup + 1]; ++choice) { + if (includedChoicesIt != includedChoicesIte) { + STORM_LOG_TRACE("Next (local) choice contained in MEC is " << (*includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first])); + STORM_LOG_TRACE("Current (local) choice iterated is " << (choice - rowGroupIndices[originalRowGroup])); + if (choice - rowGroupIndices[originalRowGroup] != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) { + STORM_LOG_TRACE("Choice leaves the EC."); + leavingChoices->insert(choice); + } else { + STORM_LOG_TRACE("Choice stays in the EC."); + ++includedChoicesIt; + } } else { - + STORM_LOG_TRACE("Choice leaves the EC, because there is no more choice staying in the EC."); + leavingChoices->insert(choice); } } + + stateToLeavingChoicesOfEndComponent[originalState] = leavingChoices; + } + + // If one of the states of the EC is a target state, all states in the EC have probability 1. + if (containsTargetState) { + STORM_LOG_TRACE("MEC contains a target state."); + for (auto const& stateAndChoices : mec) { + // Compute the state of the original model that corresponds to the current state. + StateType originalState = relevantStates[stateAndChoices.first]; + + STORM_LOG_TRACE("Setting lower bound of state in row group " << stateToRowGroupMapping[originalState] << " to 1."); + lowerBoundsPerState[stateToRowGroupMapping[originalState]] = storm::utility::one(); + terminalStates.insert(originalState); + } + } else if (leavingChoices->empty()) { + STORM_LOG_TRACE("MEC's leaving choices are empty."); + // If there is no choice leaving the EC, but it contains no target state, all states have probability 0. + for (auto const& stateAndChoices : mec) { + // Compute the state of the original model that corresponds to the current state. + StateType originalState = relevantStates[stateAndChoices.first]; + + STORM_LOG_TRACE("Setting upper bound of state in row group " << stateToRowGroupMapping[originalState] << " to 0."); + upperBoundsPerState[stateToRowGroupMapping[originalState]] = storm::utility::zero(); + terminalStates.insert(originalState); + } } } - - exit(-1); } template @@ -255,8 +411,8 @@ namespace storm { STORM_LOG_THROW(stateStorage.initialStateIndices.size() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine."); StateType initialStateIndex = stateStorage.initialStateIndices.front(); - // A set storing all target states. - boost::container::flat_set targetStates; + // A set storing all states in which to terminate the search. + boost::container::flat_set terminalStates; // Vectors to store the lower/upper bounds for each action (in each state). std::vector lowerBoundsPerAction; @@ -264,11 +420,16 @@ namespace storm { std::vector lowerBoundsPerState; std::vector upperBoundsPerState; + // Since we might run into end-components, we track a mapping from states in ECs to all leaving choices of + // that EC. + std::unordered_map stateToLeavingChoicesOfEndComponent; + // Now perform the actual sampling. std::vector> stateActionStack; std::size_t iterations = 0; std::size_t maxPathLength = 0; + std::size_t numberOfTargetStates = 0; std::size_t pathLengthUntilEndComponentDetection = 27; bool convergenceCriterionMet = false; while (!convergenceCriterionMet) { @@ -300,8 +461,8 @@ namespace storm { // does not need to be expanded. generator.load(currentState); if (generator.satisfies(targetStateExpression)) { - STORM_LOG_TRACE("State does not need to be expanded, because it is a target state."); - targetStates.insert(currentStateId); + STORM_LOG_TRACE("State does not need to be expanded, because it is a target state. +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); + ++numberOfTargetStates; foundTargetState = true; foundTerminalState = true; } else { @@ -330,25 +491,40 @@ namespace storm { // Next, we insert the behavior into our matrix structure. StateType startRow = matrix.size(); matrix.resize(startRow + behavior.getNumberOfChoices()); + + // Terminate the row group. + rowGroupIndices.push_back(matrix.size()); + uint32_t currentAction = 0; for (auto const& choice : behavior) { for (auto const& entry : choice) { + std::cout << "adding " << currentStateId << " -> " << entry.first << " with prob " << entry.second << std::endl; matrix[startRow + currentAction].emplace_back(entry.first, entry.second); } lowerBoundsPerAction.push_back(storm::utility::zero()); upperBoundsPerAction.push_back(storm::utility::one()); - // Update the bounds for the explored states to initially let them have the correct value. - updateProbabilities(currentStateId, currentAction, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + std::pair values = computeValuesOfChoice(startRow + currentAction, matrix, stateToRowGroupMapping, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + lowerBoundsPerAction.back() = values.first; + upperBoundsPerAction.back() = values.second; + + STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << lowerBoundsPerAction.back() << " and " << upperBoundsPerAction.back() << "."); ++currentAction; } + + std::pair values = computeValuesOfState(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + lowerBoundsPerState.back() = values.first; + upperBoundsPerState.back() = values.second; + + STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << lowerBoundsPerState.back() << " and " << upperBoundsPerState.back() << "."); } } if (foundTerminalState) { STORM_LOG_TRACE("State does not need to be explored, because it is " << (foundTargetState ? "a target state" : "a rejecting terminal state") << "."); + terminalStates.insert(currentStateId); if (foundTargetState) { lowerBoundsPerState.back() = storm::utility::one(); @@ -363,50 +539,90 @@ namespace storm { // Increase the size of the matrix, but leave the row empty. matrix.resize(matrix.size() + 1); - STORM_LOG_TRACE("Updating probabilities along states in stack."); - updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + // Terminate the row group. + rowGroupIndices.push_back(matrix.size()); } // Now that we have explored the state, we can dispose of it. unexploredStates.erase(unexploredIt); - - // Terminate the row group. - rowGroupIndices.push_back(matrix.size()); } else { - // If the state was explored before, but determined to be a terminal state of the exploration, - // we need to determine this now. - if (matrix[rowGroupIndices[stateToRowGroupMapping[currentStateId]]].empty()) { + if (terminalStates.find(currentStateId) != terminalStates.end()) { + STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << "."); foundTerminalState = true; - - // Update the bounds along the path to the terminal state. - updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); } } - if (!foundTerminalState) { + if (foundTerminalState) { + // Update the bounds along the path to the terminal state. + STORM_LOG_TRACE("Found terminal state, updating probabilities along path."); + updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker); + } else { + std::cout << "(2) stack is:" << std::endl; + for (auto const& el : stateActionStack) { + std::cout << el.first << "-[" << el.second << "]-> "; + } + std::cout << std::endl; + + for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { + if (stateToRowGroupMapping[state] != unexploredMarker) { + std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: "; + for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) { + std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], "; + } + std::cout << std::endl; + } else { + std::cout << "state " << state << " is unexplored" << std::endl; + } + } + // At this point, we can be sure that the state was expanded and that we can sample according to the // probabilities in the matrix. - uint32_t chosenAction = sampleFromMaxActions(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, upperBoundsPerAction, unexploredMarker); + uint32_t chosenAction = sampleFromMaxActions(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, upperBoundsPerState, stateToLeavingChoicesOfEndComponent, unexploredMarker); stateActionStack.back().second = chosenAction; STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << "."); - StateType successor = sampleSuccessorFromAction(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping); + StateType successor = sampleSuccessorFromAction(chosenAction, matrix, rowGroupIndices, stateToRowGroupMapping); STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << "."); // Put the successor state and a dummy action on top of the stack. stateActionStack.emplace_back(successor, 0); maxPathLength = std::max(maxPathLength, stateActionStack.size()); - // If the current path length exceeds the threshold, we perform an EC detection. - if (stateActionStack.size() > pathLengthUntilEndComponentDetection) { - detectEndComponents(stateActionStack, targetStates, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredStates, unexploredMarker); + // If the current path length exceeds the threshold and the model is a nondeterministic one, we + // perform an EC detection. + if (stateActionStack.size() > pathLengthUntilEndComponentDetection && !program.isDeterministicModel()) { + detectEndComponents(stateActionStack, terminalStates, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, stateToLeavingChoicesOfEndComponent, unexploredMarker); + + // Abort the current search. + STORM_LOG_TRACE("Aborting the search after EC detection."); + stateActionStack.clear(); + break; } } } + // Sanity check of results. + for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { + if (stateToRowGroupMapping[state] != unexploredMarker) { + STORM_LOG_ASSERT(lowerBoundsPerState[stateToRowGroupMapping[state]] <= upperBoundsPerState[stateToRowGroupMapping[state]], "The bounds for state " << state << " are not in a sane relation: " << lowerBoundsPerState[stateToRowGroupMapping[state]] << " > " << upperBoundsPerState[stateToRowGroupMapping[state]] << "."); + } + } + + for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) { + if (stateToRowGroupMapping[state] != unexploredMarker) { + std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: "; + for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) { + std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], "; + } + std::cout << std::endl; + } else { + std::cout << "state " << state << " is unexplored" << std::endl; + } + } + + STORM_LOG_DEBUG("Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored)."); - STORM_LOG_DEBUG("Lower bound is " << lowerBoundsPerState[initialStateIndex] << "."); - STORM_LOG_DEBUG("Upper bound is " << upperBoundsPerState[initialStateIndex] << "."); + STORM_LOG_DEBUG("Value of initial state is in [" << lowerBoundsPerState[initialStateIndex] << ", " << upperBoundsPerState[initialStateIndex] << "]."); ValueType difference = upperBoundsPerState[initialStateIndex] - lowerBoundsPerState[initialStateIndex]; STORM_LOG_DEBUG("Difference after iteration " << iterations << " is " << difference << "."); convergenceCriterionMet = difference < 1e-6; @@ -416,7 +632,7 @@ namespace storm { if (storm::settings::generalSettings().isShowStatisticsSet()) { std::cout << std::endl << "Learning summary -------------------------" << std::endl; - std::cout << "Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored)" << std::endl; + std::cout << "Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored, " << numberOfTargetStates << " target states)" << std::endl; std::cout << "Sampling iterations: " << iterations << std::endl; std::cout << "Maximal path length: " << maxPathLength << std::endl; } diff --git a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h index 9d4820b51..0b482f57e 100644 --- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h +++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h @@ -32,8 +32,10 @@ namespace storm { class SparseMdpLearningModelChecker : public AbstractModelChecker { public: typedef uint32_t StateType; + typedef boost::container::flat_set ChoiceSet; + typedef std::shared_ptr ChoiceSetPointer; - SparseMdpLearningModelChecker(storm::prism::Program const& program); + SparseMdpLearningModelChecker(storm::prism::Program const& program, boost::optional> const& constantDefinitions); virtual bool canHandle(CheckTask const& checkTask) const override; @@ -46,11 +48,15 @@ namespace storm { void updateProbabilitiesUsingStack(std::vector>& stateActionStack, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBounds, std::vector& upperBounds, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, StateType const& unexploredMarker) const; - uint32_t sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& upperBounds, StateType const& unexploredMarker); + uint32_t sampleFromMaxActions(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& upperBoundsPerState, std::unordered_map const& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker); - StateType sampleSuccessorFromAction(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping); + StateType sampleSuccessorFromAction(StateType chosenAction, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping); - void detectEndComponents(std::vector> const& stateActionStack, boost::container::flat_set const& targetStates, std::vector>>& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, std::unordered_map& unexploredStates, StateType const& unexploredMarker) const; + void detectEndComponents(std::vector> const& stateActionStack, boost::container::flat_set& terminalStates, std::vector>>& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector& lowerBoundsPerAction, std::vector& upperBoundsPerAction, std::vector& lowerBoundsPerState, std::vector& upperBoundsPerState, std::unordered_map& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) const; + + std::pair computeValuesOfChoice(uint32_t action, std::vector>> const& transitionMatrix, std::vector const& stateToRowGroupMapping, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker); + + std::pair computeValuesOfState(StateType currentStateId, std::vector>> const& transitionMatrix, std::vector const& rowGroupIndices, std::vector const& stateToRowGroupMapping, std::vector const& lowerBounds, std::vector const& upperBounds, std::vector const& lowerBoundsPerState, std::vector const& upperBoundsPerState, StateType const& unexploredMarker); storm::expressions::Expression getTargetStateExpression(storm::logic::Formula const& subformula); diff --git a/src/storage/MaximalEndComponentDecomposition.cpp b/src/storage/MaximalEndComponentDecomposition.cpp index 87c4bc509..0b9995c08 100644 --- a/src/storage/MaximalEndComponentDecomposition.cpp +++ b/src/storage/MaximalEndComponentDecomposition.cpp @@ -98,7 +98,7 @@ namespace storm { for (uint_fast64_t choice = nondeterministicChoiceIndices[state]; choice < nondeterministicChoiceIndices[state + 1]; ++choice) { bool choiceContainedInMEC = true; for (auto const& entry : transitionMatrix.getRow(choice)) { - if (entry.getValue() != storm::utility::zero()) { + if (entry.getValue() == storm::utility::zero()) { continue; } @@ -179,6 +179,7 @@ namespace storm { } } + STORM_LOG_ASSERT(!containedChoices.empty(), "The contained choices of any state in an MEC must be non-empty."); newMec.addState(state, std::move(containedChoices)); }