started refactoring learning model checker

Former-commit-id: b9e6015ae4
10 years ago · 5092435329
2 changed files with 621 additions and 505 deletions
--- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp
+++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.cpp
@ -1,11 +1,8 @@
 #include "src/modelchecker/reachability/SparseMdpLearningModelChecker.h"

 #include "src/storage/SparseMatrix.h"
-#include "src/storage/sparse/StateStorage.h"
 #include "src/storage/MaximalEndComponentDecomposition.h"

-#include "src/generator/PrismNextStateGenerator.h"
-
 #include "src/logic/FragmentSpecification.h"

 #include "src/utility/prism.h"
@ -18,6 +15,7 @@
 #include "src/settings/modules/GeneralSettings.h"

 #include "src/utility/macros.h"
+#include "src/exceptions/InvalidPropertyException.h"
 #include "src/exceptions/NotSupportedException.h"

 namespace storm {
@ -36,239 +34,297 @@ namespace storm {
        }
        
        template<typename ValueType>
-        void SparseMdpLearningModelChecker<ValueType>::updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, StateType const& unexploredMarker) const {
-            // Find out which row of the matrix we have to consider for the given action.
-            StateType sourceRow = action;
+        std::unique_ptr<CheckResult> SparseMdpLearningModelChecker<ValueType>::computeReachabilityProbabilities(CheckTask<storm::logic::EventuallyFormula> const& checkTask) {
+            storm::logic::EventuallyFormula const& eventuallyFormula = checkTask.getFormula();
+            storm::logic::Formula const& subformula = eventuallyFormula.getSubformula();
+            STORM_LOG_THROW(program.isDeterministicModel() || checkTask.isOptimizationDirectionSet(), storm::exceptions::InvalidPropertyException, "For nondeterministic systems, an optimization direction (min/max) must be given in the property.");
+            STORM_LOG_THROW(subformula.isAtomicExpressionFormula() || subformula.isAtomicLabelFormula(), storm::exceptions::NotSupportedException, "Learning engine can only deal with formulas of the form 'F \"label\"' or 'F expression'.");
            
-            // Compute the new lower/upper values of the action.
-            ValueType newLowerValue = storm::utility::zero<ValueType>();
-            ValueType newUpperValue = storm::utility::zero<ValueType>();
-            
-//            boost::optional<ValueType> loopProbability;
-            for (auto const& element : transitionMatrix[sourceRow]) {
-                // If the element is a self-loop, we treat the probability by a proper rescaling after the loop.
-//                if (element.getColumn() == sourceStateId) {
-//                    STORM_LOG_TRACE("Found self-loop with probability " << element.getValue() << ".");
-//                    loopProbability = element.getValue();
-//                    continue;
-//                }
-                
-                std::cout << "lower += " << element.getValue() << " * state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero<ValueType>() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl;
-                if (stateToRowGroupMapping[element.getColumn()] != unexploredMarker) {
-                    std::cout << "upper bounds per state @ " << stateToRowGroupMapping[element.getColumn()] << " is " << upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]] << std::endl;
-                }
-                std::cout << "upper += " << element.getValue() << " * state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl;
-                newLowerValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero<ValueType>() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
-                newUpperValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
-                std::cout << "after iter " << newLowerValue << " and " << newUpperValue << std::endl;
-            }
-            
-            // If there was a self-loop with probability p, we scale the probabilities by 1/(1-p).
-//            if (loopProbability) {
-//                STORM_LOG_TRACE("Scaling values " << newLowerValue << " and " << newUpperValue << " with " << (storm::utility::one<ValueType>()/(storm::utility::one<ValueType>() - loopProbability.get())) << ".");
-//                newLowerValue = newLowerValue / (storm::utility::one<ValueType>() - loopProbability.get());
-//                newUpperValue = newUpperValue / (storm::utility::one<ValueType>() - loopProbability.get());
-//            }
+            StateGeneration stateGeneration(storm::generator::PrismNextStateGenerator<ValueType, StateType>(program, variableInformation, false), getTargetStateExpression(subformula));
            
-            // And set them as the current value.
-            std::cout << newLowerValue << " vs " << newUpperValue << std::endl;
-            STORM_LOG_ASSERT(newLowerValue <= newUpperValue, "Lower bound must always be smaller or equal than upper bound.");
-            STORM_LOG_TRACE("Updating lower value of action " << action << " of state " << sourceStateId << " to " << newLowerValue << " (was " << lowerBoundsPerAction[action] << ").");
-            lowerBoundsPerAction[action] = newLowerValue;
-            STORM_LOG_TRACE("Updating upper value of action " << action << " of state " << sourceStateId << " to " << newUpperValue << " (was " << upperBoundsPerAction[action] << ").");
-            upperBoundsPerAction[action] = newUpperValue;
+            ExplorationInformation explorationInformation(variableInformation.getTotalBitOffset(true));
+            explorationInformation.optimizationDirection = checkTask.isOptimizationDirectionSet() ? checkTask.getOptimizationDirection() : storm::OptimizationDirection::Maximize;
            
-            // Check if we need to update the values for the states.
-            if (lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] < newLowerValue) {
-                STORM_LOG_TRACE("Got new lower bound for state " << sourceStateId << ": " << newLowerValue << " (was " << lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] << ").");
-                lowerBoundsPerState[stateToRowGroupMapping[sourceStateId]] = newLowerValue;
-            }
+            // The first row group starts at action 0.
+            explorationInformation.newRowGroup(0);
            
-            uint32_t sourceRowGroup = stateToRowGroupMapping[sourceStateId];
-            if (newUpperValue < upperBoundsPerState[sourceRowGroup]) {
-                if (rowGroupIndices[sourceRowGroup + 1] - rowGroupIndices[sourceRowGroup] > 1) {
-                    ValueType max = storm::utility::zero<ValueType>();
+            // Create a callback for the next-state generator to enable it to request the index of states.
+            std::function<StateType (storm::generator::CompressedState const&)> stateToIdCallback = createStateToIdCallback(explorationInformation);
            
-                    for (uint32_t currentAction = rowGroupIndices[sourceRowGroup]; currentAction < rowGroupIndices[sourceRowGroup + 1]; ++currentAction) {
-                        std::cout << "cur: " << currentAction << std::endl;
-                        if (currentAction == action) {
-                            continue;
+            // Compute and return result.
+            std::tuple<StateType, ValueType, ValueType> boundsForInitialState = performLearningProcedure(stateGeneration, explorationInformation);
+            return std::make_unique<ExplicitQuantitativeCheckResult<ValueType>>(std::get<0>(boundsForInitialState), std::get<1>(boundsForInitialState));
        }
        
-                        ValueType currentValue = storm::utility::zero<ValueType>();
-                        for (auto const& element : transitionMatrix[currentAction]) {
-                            currentValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
+        template<typename ValueType>
+        storm::expressions::Expression SparseMdpLearningModelChecker<ValueType>::getTargetStateExpression(storm::logic::Formula const& subformula) const {
+            storm::expressions::Expression result;
+            if (subformula.isAtomicExpressionFormula()) {
+                result = subformula.asAtomicExpressionFormula().getExpression();
+            } else {
+                result = program.getLabelExpression(subformula.asAtomicLabelFormula().getLabel());
            }
-                        max = std::max(max, currentValue);
-                        std::cout << "max is " << max << std::endl;
+            return result;
        }
        
-                    newUpperValue = std::max(newUpperValue, max);
-                }
+        template<typename ValueType>
+        std::function<typename SparseMdpLearningModelChecker<ValueType>::StateType (storm::generator::CompressedState const&)> SparseMdpLearningModelChecker<ValueType>::createStateToIdCallback(ExplorationInformation& explorationInformation) const {
+            return [&explorationInformation] (storm::generator::CompressedState const& state) -> StateType {
+                StateType newIndex = explorationInformation.stateStorage.numberOfStates;
                
-                if (newUpperValue < upperBoundsPerState[sourceRowGroup]) {
-                    STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << " (was " << upperBoundsPerState[sourceRowGroup] << ").");
-                    std::cout << "writing at index " << sourceRowGroup << std::endl;
-                    upperBoundsPerState[sourceRowGroup] = newUpperValue;
-                }
+                // Check, if the state was already registered.
+                std::pair<uint32_t, std::size_t> actualIndexBucketPair = explorationInformation.stateStorage.stateToId.findOrAddAndGetBucket(state, newIndex);
+                
+                if (actualIndexBucketPair.first == newIndex) {
+                    explorationInformation.addUnexploredState(state);
                }
+                
+                return actualIndexBucketPair.first;
+            };
        }
        
        template<typename ValueType>
-        void SparseMdpLearningModelChecker<ValueType>::updateProbabilitiesUsingStack(std::vector<std::pair<StateType, uint32_t>>& stateActionStack, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, StateType const& unexploredMarker) const {
+        std::tuple<typename SparseMdpLearningModelChecker<ValueType>::StateType, ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::performLearningProcedure(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation) const {
            
-            std::cout << "stack is:" << std::endl;
-            for (auto const& el : stateActionStack) {
-                std::cout << el.first << "-[" << el.second << "]-> ";
-            }
-            std::cout << std::endl;
+            // Generate the initial state so we know where to start the simulation.
+            explorationInformation.setInitialStates(stateGeneration.getInitialStates());
+            STORM_LOG_THROW(explorationInformation.getNumberOfInitialStates() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine.");
+            StateType initialStateIndex = explorationInformation.getFirstInitialState();
            
-            stateActionStack.pop_back();
-            while (!stateActionStack.empty()) {
-                updateProbabilities(stateActionStack.back().first, stateActionStack.back().second, transitionMatrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker);
-                stateActionStack.pop_back();
-            }
-        }
+            // Create a structure that holds the bounds for the states and actions.
+            BoundValues bounds;

-        template<typename ValueType>
-        std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::computeValuesOfChoice(uint32_t action, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType> const& lowerBoundsPerState, std::vector<ValueType> const& upperBoundsPerState, StateType const& unexploredMarker) {
-            std::pair<ValueType, ValueType> result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
-            for (auto const& element : transitionMatrix[action]) {
-                result.first += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::zero<ValueType>() : lowerBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
-                result.second += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
+            // Create a stack that is used to track the path we sampled.
+            StateActionStack stack;
+            
+            // Now perform the actual sampling.
+            Statistics stats;
+            bool convergenceCriterionMet = false;
+            while (!convergenceCriterionMet) {
+                bool result = samplePathFromState(stateGeneration, explorationInformation, stack, bounds, stats);
+                
+                // If a terminal state was found, we update the probabilities along the path contained in the stack.
+                if (result) {
+                    // Update the bounds along the path to the terminal state.
+                    STORM_LOG_TRACE("Found terminal state, updating probabilities along path.");
+                    updateProbabilityBoundsAlongSampledPath(stack, explorationInformation, bounds);
+                } else {
+                    // If not terminal state was found, the search aborted, possibly because of an EC-detection. In this
+                    // case, we cannot update the probabilities.
+                    STORM_LOG_TRACE("Did not find terminal state.");
                }
-            return result;
+                
+                STORM_LOG_DEBUG("Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << "explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored).");
+                STORM_LOG_DEBUG("Value of initial state is in [" << bounds.getLowerBoundForState(initialStateIndex, explorationInformation) << ", " << bounds.getUpperBoundForState(initialStateIndex, explorationInformation) << "].");
+                ValueType difference = bounds.getDifferenceOfStateBounds(initialStateIndex, explorationInformation);
+                STORM_LOG_DEBUG("Difference after iteration " << stats.iterations << " is " << difference << ".");
+                convergenceCriterionMet = difference < 1e-6;
+                
+                ++stats.iterations;
            }
            
-        template<typename ValueType>
-        std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::computeValuesOfState(StateType currentStateId, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType> const& lowerBounds, std::vector<ValueType> const& upperBounds, std::vector<ValueType> const& lowerBoundsPerState, std::vector<ValueType> const& upperBoundsPerState, StateType const& unexploredMarker) {
-            StateType sourceRowGroup = stateToRowGroupMapping[currentStateId];
-            std::pair<ValueType, ValueType> result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
-            for (uint32_t choice = rowGroupIndices[sourceRowGroup]; choice < rowGroupIndices[sourceRowGroup + 1]; ++choice) {
-                std::pair<ValueType, ValueType> choiceValues = computeValuesOfChoice(choice, transitionMatrix, stateToRowGroupMapping, lowerBoundsPerState, upperBoundsPerState, unexploredMarker);
-                result.first = std::max(choiceValues.first, result.first);
-                result.second = std::max(choiceValues.second, result.second);
+            if (storm::settings::generalSettings().isShowStatisticsSet()) {
+                std::cout << std::endl << "Learning summary -------------------------" << std::endl;
+                std::cout << "Discovered states: " << explorationInformation.getNumberOfDiscoveredStates() << " (" << stats.numberOfExploredStates << "explored, " << explorationInformation.getNumberOfUnexploredStates() << " unexplored, " << stats.numberOfTargetStates << " target states)" << std::endl;
+                std::cout << "Sampling iterations: " << stats.iterations << std::endl;
+                std::cout << "Maximal path length: " << stats.maxPathLength << std::endl;
            }
-            return result;
+            
+            return std::make_tuple(initialStateIndex, bounds.getLowerBoundForState(initialStateIndex, explorationInformation), bounds.getUpperBoundForState(initialStateIndex, explorationInformation));
        }
        
        template<typename ValueType>
-        uint32_t SparseMdpLearningModelChecker<ValueType>::sampleFromMaxActions(StateType currentStateId, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType> const& upperBoundsPerState, std::unordered_map<StateType, ChoiceSetPointer> const& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) {
+        bool SparseMdpLearningModelChecker<ValueType>::samplePathFromState(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation, StateActionStack& stack, BoundValues& bounds, Statistics& stats) const {
            
-            StateType rowGroup = stateToRowGroupMapping[currentStateId];
+            // Start the search from the initial state.
+            stack.push_back(std::make_pair(explorationInformation.getFirstInitialState(), 0));
            
-            // First, determine all maximizing actions.
-            std::vector<uint32_t> allMaxActions;
+            bool foundTerminalState = false;
+            while (!foundTerminalState) {
+                StateType const& currentStateId = stack.back().first;
+                STORM_LOG_TRACE("State on top of stack is: " << currentStateId << ".");
                
-            for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
-                if (stateToRowGroupMapping[state] != unexploredMarker) {
-                    std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [x, " << upperBoundsPerState[stateToRowGroupMapping[state]] << "]" << std::endl;
+                // If the state is not yet explored, we need to retrieve its behaviors.
+                auto unexploredIt = explorationInformation.unexploredStates.find(currentStateId);
+                if (unexploredIt != explorationInformation.unexploredStates.end()) {
+                    STORM_LOG_TRACE("State was not yet explored.");
+                    
+                    // Explore the previously unexplored state.
+                    storm::generator::CompressedState const& compressedState = unexploredIt->second;
+                    foundTerminalState = exploreState(stateGeneration, currentStateId, compressedState, explorationInformation, bounds, stats);
+                    explorationInformation.unexploredStates.erase(unexploredIt);
                } else {
-                    std::cout << "state " << state << " is unexplored" << std::endl;
+                    // If the state was already explored, we check whether it is a terminal state or not.
+                    if (explorationInformation.isTerminal(currentStateId)) {
+                        STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << ".");
+                        foundTerminalState = true;
                    }
                }
                
-            // Determine the maximal value of any action.
-            ValueType max = 0;
-            auto choicesInEcIt = stateToLeavingChoicesOfEndComponent.find(currentStateId);
-            if (choicesInEcIt != stateToLeavingChoicesOfEndComponent.end()) {
-                STORM_LOG_TRACE("Sampling from actions leaving the previously detected EC.");
-                for (auto const& row : *choicesInEcIt->second) {
-                    ValueType current = 0;
-                    for (auto const& element : transitionMatrix[row]) {
-                        current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
-                    }
+                // If the state was not a terminal state, we continue the path search and sample the next state.
+                if (!foundTerminalState) {
+                    // At this point, we can be sure that the state was expanded and that we can sample according to the
+                    // probabilities in the matrix.
+                    uint32_t chosenAction = sampleFromMaxActions(currentStateId, explorationInformation, bounds);
+                    stack.back().second = chosenAction;
+                    STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << ".");
+                    
+                    StateType successor = sampleSuccessorFromAction(chosenAction, explorationInformation);
+                    STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << ".");
                    
-                    max = std::max(max, current);
+                    // Put the successor state and a dummy action on top of the stack.
+                    stack.emplace_back(successor, 0);
+                    stats.maxPathLength = std::max(stats.maxPathLength, stack.size());
+                    
+                    // If the current path length exceeds the threshold and the model is a nondeterministic one, we
+                    // perform an EC detection.
+                    if (stack.size() > stats.pathLengthUntilEndComponentDetection && !program.isDeterministicModel()) {
+                        detectEndComponents(stack, explorationInformation, bounds);
+                        
+                        // Abort the current search.
+                        STORM_LOG_TRACE("Aborting the search after EC detection.");
+                        stack.clear();
+                        break;
                    }
-            } else {
-                STORM_LOG_TRACE("Sampling from actions leaving the state.");
-                for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) {
-                    ValueType current = 0;
-                    for (auto const& element : transitionMatrix[row]) {
-                        current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
                }
-                    max = std::max(max, current);
            }
+            
+            return foundTerminalState;
        }
        
-            STORM_LOG_TRACE("Looking for action with value " << max << ".");
+        template<typename ValueType>
+        bool SparseMdpLearningModelChecker<ValueType>::exploreState(StateGeneration& stateGeneration, StateType const& currentStateId, storm::generator::CompressedState const& currentState, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const {
+            
+            bool isTerminalState = false;
+            bool isTargetState = false;
            
-            for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
-                if (stateToRowGroupMapping[state] != unexploredMarker) {
-                    std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [x, " << upperBoundsPerState[stateToRowGroupMapping[state]] << "]" << std::endl;
+            // Before generating the behavior of the state, we need to determine whether it's a target state that
+            // does not need to be expanded.
+            stateGeneration.generator.load(currentState);
+            if (stateGeneration.isTargetState()) {
+                ++stats.numberOfTargetStates;
+                isTargetState = true;
+                isTerminalState = true;
            } else {
-                    std::cout << "state " << state << " is unexplored" << std::endl;
+                STORM_LOG_TRACE("Exploring state.");
+                
+                // If it needs to be expanded, we use the generator to retrieve the behavior of the new state.
+                storm::generator::StateBehavior<ValueType, StateType> behavior = stateGeneration.expand();
+                STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices.");
+                
+                // Clumsily check whether we have found a state that forms a trivial BMEC.
+                if (behavior.getNumberOfChoices() == 0) {
+                    isTerminalState = true;
+                } else if (behavior.getNumberOfChoices() == 1) {
+                    auto const& onlyChoice = *behavior.begin();
+                    if (onlyChoice.size() == 1) {
+                        auto const& onlyEntry = *onlyChoice.begin();
+                        if (onlyEntry.first == currentStateId) {
+                            isTerminalState = true;
+                        }
                    }
                }
                
-            if (choicesInEcIt != stateToLeavingChoicesOfEndComponent.end()) {
-                for (auto const& row : *choicesInEcIt->second) {
-                    ValueType current = 0;
-                    for (auto const& element : transitionMatrix[row]) {
-                        current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
+                // If the state was neither a trivial (non-accepting) terminal state nor a target state, we
+                // need to store its behavior.
+                if (!isTerminalState) {
+                    // Next, we insert the behavior into our matrix structure.
+                    StateType startRow = explorationInformation.matrix.size();
+                    explorationInformation.addRowsToMatrix(behavior.getNumberOfChoices());
+                    
+                    // Terminate the row group.
+                    explorationInformation.rowGroupIndices.push_back(explorationInformation.matrix.size());
+                    
+                    ActionType currentAction = 0;
+                    for (auto const& choice : behavior) {
+                        for (auto const& entry : choice) {
+                            std::cout << "adding " << currentStateId << " -> " << entry.first << " with prob " << entry.second << std::endl;
+                            explorationInformation.matrix[startRow + currentAction].emplace_back(entry.first, entry.second);
                        }
-                    STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << ".");
                        
-                    // If the action is one of the maximizing ones, insert it into our list.
-                    if (comparator.isEqual(current, max)) {
-                        allMaxActions.push_back(row);
+                        bounds.initializeActionBoundsForNextAction(computeBoundsOfAction(startRow + currentAction, explorationInformation, bounds));
+
+                        STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << bounds.getLowerBoundForAction(startRow + currentAction) << " and " << bounds.getUpperBoundForAction(startRow + currentAction) << ".");
+                        
+                        ++currentAction;
+                    }
+                    
+                    bounds.initializeStateBoundsForNextState(computeBoundsOfState(currentStateId, explorationInformation, bounds));
+                    STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << bounds.getLowerBoundForState(currentStateId) << " and " << bounds.getUpperBoundForState(currentStateId) << ".");
                }
            }
+            
+            if (isTerminalState) {
+                STORM_LOG_TRACE("State does not need to be explored, because it is " << (isTargetState ? "a target state" : "a rejecting terminal state") << ".");
+                explorationInformation.addTerminalState(currentStateId);
+                
+                if (isTargetState) {
+                    bounds.initializeStateBoundsForNextState(std::make_pair(storm::utility::one<ValueType>(), storm::utility::one<ValueType>()));
+                    bounds.initializeStateBoundsForNextAction(std::make_pair(storm::utility::one<ValueType>(), storm::utility::one<ValueType>()));
                } else {
-                for (uint32_t row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) {
-                    ValueType current = 0;
-                    for (auto const& element : transitionMatrix[row]) {
-                        if (stateToRowGroupMapping[element.getColumn()] != unexploredMarker) {
-                            std::cout << "upper bounds per state @ " << stateToRowGroupMapping[element.getColumn()] << " is " << upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]] << std::endl;
+                    bounds.initializeStateBoundsForNextState(std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>()));
+                    bounds.initializeStateBoundsForNextAction(std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>()));
+                }
+                
+                // Increase the size of the matrix, but leave the row empty.
+                explorationInformation.addRowsToMatrix(1);
+                
+                // Terminate the row group.
+                explorationInformation.newRowGroup();
            }
-                        std::cout << "+= " << element.getValue() << " * " << "state[" << element.getColumn() << "] = " << (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]) << std::endl;
-                        current += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
+            
+            // Finally, map the unexplored state to the row group.
+            explorationInformation.assignStateToNextRowGroup(currentStateId);
+            STORM_LOG_TRACE("Assigning row group " << explorationInformation.getRowGroup(currentStateId) << " to state " << currentStateId << ".");
+            
+            return isTerminalState;
        }
-                    STORM_LOG_TRACE("Computed (upper) bound " << current << " for row " << row << ".");
        
-                    // If the action is one of the maximizing ones, insert it into our list.
-                    if (comparator.isEqual(current, max)) {
-                        allMaxActions.push_back(row);
+        template<typename ValueType>
+        uint32_t SparseMdpLearningModelChecker<ValueType>::sampleMaxAction(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues& bounds) const {
+            StateType rowGroup = explorationInformation.getRowGroup(currentStateId);
+            
+            // First, determine all maximizing actions.
+            std::vector<ActionType> allMaxActions;
+            
+            // Determine the values of all available actions.
+            std::vector<std::pair<ActionType, ValueType>> actionValues;
+            auto choicesInEcIt = explorationInformation.stateToLeavingChoicesOfEndComponent.find(currentStateId);
+            if (choicesInEcIt != explorationInformation.stateToLeavingChoicesOfEndComponent.end()) {
+                STORM_LOG_TRACE("Sampling from actions leaving the previously detected EC.");
+                for (auto const& row : *choicesInEcIt->second) {
+                    actionValues.push_back(std::make_pair(row, computeUpperBoundOfAction(row, explorationInformation, bounds)));
                }
+            } else {
+                STORM_LOG_TRACE("Sampling from actions leaving the state.");
+                
+                for (uint32_t row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) {
+                    actionValues.push_back(std::make_pair(row, computeUpperBoundOfAction(row, explorationInformation, bounds)));
                }
            }
            
-            STORM_LOG_ASSERT(!allMaxActions.empty(), "Must have at least one maximizing action.");
+            std::sort(actionValues.begin(), actionValues.end(), [] (std::pair<ActionType, ValueType> const& a, std::pair<ActionType, ValueType> const& b) { return b.second > a.second; } );
+            auto end = std::equal_range(actionValues.begin(), actionValues.end(), [this] (std::pair<ActionType, ValueType> const& a, std::pair<ActionType, ValueType> const& b) { return comparator.isEqual(a.second, b.second); } );
            
            // Now sample from all maximizing actions.
-            std::uniform_int_distribution<uint32_t> distribution(0, allMaxActions.size() - 1);
-            return allMaxActions[distribution(randomGenerator)];
+            std::uniform_int_distribution<uint32_t> distribution(0, std::distance(actionValues.begin(), end));
+            return actionValues[distribution(randomGenerator)].first;
        }
        
        template<typename ValueType>
-        typename SparseMdpLearningModelChecker<ValueType>::StateType SparseMdpLearningModelChecker<ValueType>::sampleSuccessorFromAction(StateType chosenAction, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping) {
-            uint32_t row = chosenAction;
-            
+        typename SparseMdpLearningModelChecker<ValueType>::StateType SparseMdpLearningModelChecker<ValueType>::sampleSuccessorFromAction(ActionType const& chosenAction, ExplorationInformation const& explorationInformation) const {
            // TODO: precompute this?
-            std::vector<ValueType> probabilities(transitionMatrix[row].size());
-            std::transform(transitionMatrix[row].begin(), transitionMatrix[row].end(), probabilities.begin(), [] (storm::storage::MatrixEntry<StateType, ValueType> const& entry) { return entry.getValue(); } );
+            std::vector<ValueType> probabilities(explorationInformation.getRowOfMatrix(chosenAction).size());
+            std::transform(explorationInformation.getRowOfMatrix(chosenAction).begin(), explorationInformation.getRowOfMatrix(chosenAction).end(), probabilities.begin(), [] (storm::storage::MatrixEntry<StateType, ValueType> const& entry) { return entry.getValue(); } );
            
            // Now sample according to the probabilities.
            std::discrete_distribution<StateType> distribution(probabilities.begin(), probabilities.end());
            StateType offset = distribution(randomGenerator);
-            return transitionMatrix[row][offset].getColumn();
+            return explorationInformation.getRowOfMatrix(chosenAction)[offset].getColumn();
        }
        
        template<typename ValueType>
-        storm::expressions::Expression SparseMdpLearningModelChecker<ValueType>::getTargetStateExpression(storm::logic::Formula const& subformula) {
-            storm::expressions::Expression result;
-            if (subformula.isAtomicExpressionFormula()) {
-                result = subformula.asAtomicExpressionFormula().getExpression();
-            } else {
-                result = program.getLabelExpression(subformula.asAtomicLabelFormula().getLabel());
-            }
-            return result;
-        }
-        
-        template<typename ValueType>
-        void SparseMdpLearningModelChecker<ValueType>::detectEndComponents(std::vector<std::pair<StateType, uint32_t>> const& stateActionStack, boost::container::flat_set<StateType>& terminalStates, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, std::unordered_map<StateType, ChoiceSetPointer>& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) const {
-            
+        void SparseMdpLearningModelChecker<ValueType>::detectEndComponents(StateActionStack const& stack, ExplorationInformation& explorationInformation, BoundValues& bounds) const {
            STORM_LOG_TRACE("Starting EC detection.");
            
            // Outline:
@ -281,13 +337,14 @@ namespace storm {
            
            // Determine the set of states that was expanded.
            std::vector<StateType> relevantStates;
-            for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
-                if (stateToRowGroupMapping[state] != unexploredMarker) {
+            for (StateType state = 0; state < explorationInformation.stateStorage.numberOfStates; ++state) {
+                if (!explorationInformation.isUnexplored(state)) {
                    relevantStates.push_back(state);
                }
            }
+
            // Sort according to the actual row groups so we can insert the elements in order later.
-            std::sort(relevantStates.begin(), relevantStates.end(), [&stateToRowGroupMapping] (StateType const& a, StateType const& b) { return stateToRowGroupMapping[a] < stateToRowGroupMapping[b]; });
+            std::sort(relevantStates.begin(), relevantStates.end(), [&explorationInformation] (StateType const& a, StateType const& b) { return explorationInformation.getRowGroup(a) < explorationInformation.getRowGroup(b); });
            StateType unexploredState = relevantStates.size();
            
            // Create a mapping for faster look-up during the translation of flexible matrix to the real sparse matrix.
@ -301,10 +358,10 @@ namespace storm {
            StateType currentRow = 0;
            for (auto const& state : relevantStates) {
                builder.newRowGroup(currentRow);
-                StateType rowGroup = stateToRowGroupMapping[state];
-                for (auto row = rowGroupIndices[rowGroup]; row < rowGroupIndices[rowGroup + 1]; ++row) {
+                StateType rowGroup = explorationInformation.getRowGroup(state);
+                for (auto row = explorationInformation.getStartRowOfGroup(rowGroup); row < explorationInformation.getStartRowOfGroup(rowGroup + 1); ++row) {
                    ValueType unexpandedProbability = storm::utility::zero<ValueType>();
-                    for (auto const& entry : transitionMatrix[row]) {
+                    for (auto const& entry : explorationInformation.getRowOfMatrix(row)) {
                        auto it = relevantStateToNewRowGroupMapping.find(entry.getColumn());
                        if (it != relevantStateToNewRowGroupMapping.end()) {
                            // If the entry is a relevant state, we copy it over (and compensate for the offset change).
@ -340,41 +397,41 @@ namespace storm {
                bool containsTargetState = false;
                
                // Now we record all choices leaving the EC.
-                ChoiceSetPointer leavingChoices = std::make_shared<ChoiceSet>();
+                ActionSetPointer leavingChoices = std::make_shared<ActionSet>();
                for (auto const& stateAndChoices : mec) {
                    // Compute the state of the original model that corresponds to the current state.
                    std::cout << "local state: " << stateAndChoices.first << std::endl;
                    StateType originalState = relevantStates[stateAndChoices.first];
                    std::cout << "original state: " << originalState << std::endl;
-                    uint32_t originalRowGroup = stateToRowGroupMapping[originalState];
+                    uint32_t originalRowGroup = explorationInformation.getRowGroup(originalState);
                    std::cout << "original row group: " << originalRowGroup << std::endl;
                    
-                    // TODO: This check for a target state is a bit hackish and only works for max probabilities.
-                    if (!containsTargetState && lowerBoundsPerState[originalRowGroup] == storm::utility::one<ValueType>()) {
+                    // TODO: This checks for a target state is a bit hackish and only works for max probabilities.
+                    if (!containsTargetState && comparator.isOne(bounds.getLowerBoundForRowGroup(originalRowGroup, explorationInformation))) {
                        containsTargetState = true;
                    }
                    
                    auto includedChoicesIt = stateAndChoices.second.begin();
                    auto includedChoicesIte = stateAndChoices.second.end();
                    
-                    for (auto choice = rowGroupIndices[originalRowGroup]; choice < rowGroupIndices[originalRowGroup + 1]; ++choice) {
+                    for (auto action = explorationInformation.getStartRowOfGroup(originalRowGroup); action < explorationInformation.getStartRowOfGroup(originalRowGroup + 1); ++action) {
                        if (includedChoicesIt != includedChoicesIte) {
                            STORM_LOG_TRACE("Next (local) choice contained in MEC is " << (*includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]));
-                            STORM_LOG_TRACE("Current (local) choice iterated is " << (choice - rowGroupIndices[originalRowGroup]));
-                            if (choice - rowGroupIndices[originalRowGroup] != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) {
+                            STORM_LOG_TRACE("Current (local) choice iterated is " << (action - explorationInformation.getStartRowOfGroup(originalRowGroup)));
+                            if (action - explorationInformation.getStartRowOfGroup(originalRowGroup) != *includedChoicesIt - relevantStatesMatrix.getRowGroupIndices()[stateAndChoices.first]) {
                                STORM_LOG_TRACE("Choice leaves the EC.");
-                                leavingChoices->insert(choice);
+                                leavingChoices->insert(action);
                            } else {
                                STORM_LOG_TRACE("Choice stays in the EC.");
                                ++includedChoicesIt;
                            }
                        } else {
                            STORM_LOG_TRACE("Choice leaves the EC, because there is no more choice staying in the EC.");
-                            leavingChoices->insert(choice);
+                            leavingChoices->insert(action);
                        }
                    }
                    
-                    stateToLeavingChoicesOfEndComponent[originalState] = leavingChoices;
+                    explorationInformation.stateToLeavingChoicesOfEndComponent[originalState] = leavingChoices;
                }
                
                // If one of the states of the EC is a target state, all states in the EC have probability 1.
@ -384,9 +441,9 @@ namespace storm {
                        // Compute the state of the original model that corresponds to the current state.
                        StateType originalState = relevantStates[stateAndChoices.first];
                        
-                        STORM_LOG_TRACE("Setting lower bound of state in row group " << stateToRowGroupMapping[originalState] << " to 1.");
-                        lowerBoundsPerState[stateToRowGroupMapping[originalState]] = storm::utility::one<ValueType>();
-                        terminalStates.insert(originalState);
+                        STORM_LOG_TRACE("Setting lower bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 1.");
+                        bounds.setLowerBoundForState(originalState, explorationInformation, storm::utility::one<ValueType>());
+                        explorationInformation.addTerminalState(originalState);
                    }
                } else if (leavingChoices->empty()) {
                    STORM_LOG_TRACE("MEC's leaving choices are empty.");
@ -395,317 +452,138 @@ namespace storm {
                        // Compute the state of the original model that corresponds to the current state.
                        StateType originalState = relevantStates[stateAndChoices.first];
                        
-                        STORM_LOG_TRACE("Setting upper bound of state in row group " << stateToRowGroupMapping[originalState] << " to 0.");
-                        upperBoundsPerState[stateToRowGroupMapping[originalState]] = storm::utility::zero<ValueType>();
-                        terminalStates.insert(originalState);
+                        STORM_LOG_TRACE("Setting upper bound of state in row group " << explorationInformation.getRowGroup(originalState) << " to 0.");
+                        bounds.setUpperBoundForState(originalState, explorationInformation, storm::utility::zero<ValueType>());
+                        explorationInformation.addTerminalState(originalState);
                    }
                }
            }
        }
        
        template<typename ValueType>
-        bool SparseMdpLearningModelChecker<ValueType>::exploreState(storm::generator::PrismNextStateGenerator<ValueType, StateType>& generator, std::function<StateType (storm::generator::CompressedState const&)> const& stateToIdCallback, StateType const& currentStateId, storm::generator::CompressedState const& compressedState, StateType const& unexploredMarker, boost::container::flat_set<StateType>& terminalStates, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& matrix, std::vector<StateType>& rowGroupIndices, std::vector<StateType>& stateToRowGroupMapping, storm::expressions::Expression const& targetStateExpression, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, Statistics& stats) {
-            
-            bool isTerminalState = false;
-            bool isTargetState = false;
-            
-            // Map the unexplored state to a row group.
-            stateToRowGroupMapping[currentStateId] = rowGroupIndices.size() - 1;
-            STORM_LOG_TRACE("Assigning row group " << stateToRowGroupMapping[currentStateId] << " to state " << currentStateId << ".");
-            lowerBoundsPerState.push_back(storm::utility::zero<ValueType>());
-            upperBoundsPerState.push_back(storm::utility::one<ValueType>());
-            
-            // Before generating the behavior of the state, we need to determine whether it's a target state that
-            // does not need to be expanded.
-            generator.load(compressedState);
-            if (generator.satisfies(targetStateExpression)) {
-                ++stats.numberOfTargetStates;
-                isTargetState = true;
-                isTerminalState = true;
-            } else {
-                STORM_LOG_TRACE("Exploring state.");
-                
-                // If it needs to be expanded, we use the generator to retrieve the behavior of the new state.
-                storm::generator::StateBehavior<ValueType, StateType> behavior = generator.expand(stateToIdCallback);
-                STORM_LOG_TRACE("State has " << behavior.getNumberOfChoices() << " choices.");
-                
-                // Clumsily check whether we have found a state that forms a trivial BMEC.
-                if (behavior.getNumberOfChoices() == 0) {
-                    isTerminalState = true;
-                } else if (behavior.getNumberOfChoices() == 1) {
-                    auto const& onlyChoice = *behavior.begin();
-                    if (onlyChoice.size() == 1) {
-                        auto const& onlyEntry = *onlyChoice.begin();
-                        if (onlyEntry.first == currentStateId) {
-                            isTerminalState = true;
-                        }
+        ValueType SparseMdpLearningModelChecker<ValueType>::computeLowerBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
+            ValueType result = storm::utility::zero<ValueType>();
+            for (auto const& element : explorationInformation.getRowOfMatrix(action)) {
+                result += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation);
            }
+            return result;
        }
        
-                // If the state was neither a trivial (non-accepting) terminal state nor a target state, we
-                // need to store its behavior.
-                if (!isTerminalState) {
-                    // Next, we insert the behavior into our matrix structure.
-                    StateType startRow = matrix.size();
-                    matrix.resize(startRow + behavior.getNumberOfChoices());
-                    
-                    // Terminate the row group.
-                    rowGroupIndices.push_back(matrix.size());
-                    
-                    uint32_t currentAction = 0;
-                    for (auto const& choice : behavior) {
-                        for (auto const& entry : choice) {
-                            std::cout << "adding " << currentStateId << " -> " << entry.first << " with prob " << entry.second << std::endl;
-                            matrix[startRow + currentAction].emplace_back(entry.first, entry.second);
+        template<typename ValueType>
+        ValueType SparseMdpLearningModelChecker<ValueType>::computeUpperBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
+            ValueType result = storm::utility::zero<ValueType>();
+            for (auto const& element : explorationInformation.getRowOfMatrix(action)) {
+                result += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation);
            }
-                        
-                        lowerBoundsPerAction.push_back(storm::utility::zero<ValueType>());
-                        upperBoundsPerAction.push_back(storm::utility::one<ValueType>());
-                        
-                        std::pair<ValueType, ValueType> values = computeValuesOfChoice(startRow + currentAction, matrix, stateToRowGroupMapping, lowerBoundsPerState, upperBoundsPerState, unexploredMarker);
-                        lowerBoundsPerAction.back() = values.first;
-                        upperBoundsPerAction.back() = values.second;
-                        
-                        STORM_LOG_TRACE("Initializing bounds of action " << (startRow + currentAction) << " to " << lowerBoundsPerAction.back() << " and " << upperBoundsPerAction.back() << ".");
-                        
-                        ++currentAction;
+            return result;
        }
        
-                    std::pair<ValueType, ValueType> values = computeValuesOfState(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker);
-                    lowerBoundsPerState.back() = values.first;
-                    upperBoundsPerState.back() = values.second;
-                    
-                    STORM_LOG_TRACE("Initializing bounds of state " << currentStateId << " to " << lowerBoundsPerState.back() << " and " << upperBoundsPerState.back() << ".");
+        template<typename ValueType>
+        ValueType SparseMdpLearningModelChecker<ValueType>::computeLowerBoundOfState(StateType const& state, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
+            StateType group = explorationInformation.getRowGroup(state);
+            ValueType result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
+            for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) {
+                ValueType actionValue = computeLowerBoundOfAction(action, explorationInformation, bounds);
+                result = std::max(actionValue, result);
            }
+            return result;
        }
    
-            if (isTerminalState) {
-                STORM_LOG_TRACE("State does not need to be explored, because it is " << (isTargetState ? "a target state" : "a rejecting terminal state") << ".");
-                terminalStates.insert(currentStateId);
-                
-                if (isTargetState) {
-                    lowerBoundsPerState.back() = storm::utility::one<ValueType>();
-                    lowerBoundsPerAction.push_back(storm::utility::one<ValueType>());
-                    upperBoundsPerAction.push_back(storm::utility::one<ValueType>());
-                } else {
-                    upperBoundsPerState.back() = storm::utility::zero<ValueType>();
-                    lowerBoundsPerAction.push_back(storm::utility::zero<ValueType>());
-                    upperBoundsPerAction.push_back(storm::utility::zero<ValueType>());
+        template<typename ValueType>
+        ValueType SparseMdpLearningModelChecker<ValueType>::computeUpperBoundOfState(StateType const& state, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
+            StateType group = explorationInformation.getRowGroup(state);
+            ValueType result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
+            for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) {
+                ValueType actionValue = computeUpperBoundOfAction(action, explorationInformation, bounds);
+                result = std::max(actionValue, result);
            }
-                
-                // Increase the size of the matrix, but leave the row empty.
-                matrix.resize(matrix.size() + 1);
-                
-                // Terminate the row group.
-                rowGroupIndices.push_back(matrix.size());
+            return result;
        }
        
-            return isTerminalState;
+        template<typename ValueType>
+        std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::computeBoundsOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
+            // TODO: take into account self-loops?
+            std::pair<ValueType, ValueType> result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
+            for (auto const& element : explorationInformation.getRowOfMatrix(action)) {
+                result.first += element.getValue() * bounds.getLowerBoundForState(element.getColumn(), explorationInformation);
+                result.second += element.getValue() * bounds.getUpperBoundForState(element.getColumn(), explorationInformation);
+            }
+            return result;
        }

        template<typename ValueType>
-        bool SparseMdpLearningModelChecker<ValueType>::samplePathFromState(storm::generator::PrismNextStateGenerator<ValueType, StateType>& generator, std::function<StateType (storm::generator::CompressedState const&)> const& stateToIdCallback, StateType initialStateIndex, std::vector<std::pair<StateType, uint32_t>>& stateActionStack, std::unordered_map<StateType, storm::generator::CompressedState>& unexploredStates, StateType const& unexploredMarker, boost::container::flat_set<StateType>& terminalStates, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& matrix, std::vector<StateType>& rowGroupIndices, std::vector<StateType>& stateToRowGroupMapping, std::unordered_map<StateType, ChoiceSetPointer>& stateToLeavingChoicesOfEndComponent, storm::expressions::Expression const& targetStateExpression, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, Statistics& stats) {
-
-            // Start the search from the initial state.
-            stateActionStack.push_back(std::make_pair(initialStateIndex, 0));
-            
-            bool foundTerminalState = false;
-            while (!foundTerminalState) {
-                StateType const& currentStateId = stateActionStack.back().first;
-                STORM_LOG_TRACE("State on top of stack is: " << currentStateId << ".");
-                
-                // If the state is not yet explored, we need to retrieve its behaviors.
-                auto unexploredIt = unexploredStates.find(currentStateId);
-                if (unexploredIt != unexploredStates.end()) {
-                    STORM_LOG_TRACE("State was not yet explored.");
-                    
-                    // Explore the previously unexplored state.
-                    foundTerminalState = exploreState(generator, stateToIdCallback, currentStateId, unexploredIt->second);
-                    unexploredStates.erase(unexploredIt);
-                } else {
-                    // If the state was already explored, we check whether it is a terminal state or not.
-                    if (terminalStates.find(currentStateId) != terminalStates.end()) {
-                        STORM_LOG_TRACE("Found already explored terminal state: " << currentStateId << ".");
-                        foundTerminalState = true;
+        std::pair<ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::computeBoundsOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const {
+            StateType group = explorationInformation.getRowGroup(currentStateId);
+            std::pair<ValueType, ValueType> result = std::make_pair(storm::utility::zero<ValueType>(), storm::utility::zero<ValueType>());
+            for (ActionType action = explorationInformation.getStartRowOfGroup(group); action < explorationInformation.getStartRowOfGroup(group + 1); ++action) {
+                std::pair<ValueType, ValueType> actionValues = computeBoundsOfAction(action, explorationInformation, bounds);
+                result.first = std::max(actionValues.first, result.first);
+                result.second = std::max(actionValues.second, result.second);
            }
+            return result;
        }
        
-                // If the state was not a terminal state, we continue the path search and sample the next state.
-                if (!foundTerminalState) {
-                    std::cout << "(2) stack is:" << std::endl;
-                    for (auto const& el : stateActionStack) {
+        template<typename ValueType>
+        void SparseMdpLearningModelChecker<ValueType>::updateProbabilityBoundsAlongSampledPath(StateActionStack& stack, ExplorationInformation const& explorationInformation, BoundValues& bounds) const {
+            std::cout << "stack is:" << std::endl;
+            for (auto const& el : stack) {
                std::cout << el.first << "-[" << el.second << "]-> ";
            }
            std::cout << std::endl;
            
-                    for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
-                        if (stateToRowGroupMapping[state] != unexploredMarker) {
-                            std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: ";
-                            for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) {
-                                std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], ";
-                            }
-                            std::cout << std::endl;
-                        } else {
-                            std::cout << "state " << state << " is unexplored" << std::endl;
+            stack.pop_back();
+            while (!stack.empty()) {
+                updateProbabilityOfAction(stack.back().first, stack.back().second, explorationInformation, bounds);
+                stack.pop_back();
            }
        }

-                    // At this point, we can be sure that the state was expanded and that we can sample according to the
-                    // probabilities in the matrix.
-                    uint32_t chosenAction = sampleFromMaxActions(currentStateId, matrix, rowGroupIndices, stateToRowGroupMapping, upperBoundsPerState, stateToLeavingChoicesOfEndComponent, unexploredMarker);
-                    stateActionStack.back().second = chosenAction;
-                    STORM_LOG_TRACE("Sampled action " << chosenAction << " in state " << currentStateId << ".");
-                    
-                    StateType successor = sampleSuccessorFromAction(chosenAction, matrix, rowGroupIndices, stateToRowGroupMapping);
-                    STORM_LOG_TRACE("Sampled successor " << successor << " according to action " << chosenAction << " of state " << currentStateId << ".");
-                    
-                    // Put the successor state and a dummy action on top of the stack.
-                    stateActionStack.emplace_back(successor, 0);
-                    stats.maxPathLength = std::max(stats.maxPathLength, stateActionStack.size());
-                    
-                    // If the current path length exceeds the threshold and the model is a nondeterministic one, we
-                    // perform an EC detection.
-                    if (stateActionStack.size() > stats.pathLengthUntilEndComponentDetection && !program.isDeterministicModel()) {
-                        detectEndComponents(stateActionStack, terminalStates, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, stateToLeavingChoicesOfEndComponent, unexploredMarker);
-                        
-                        // Abort the current search.
-                        STORM_LOG_TRACE("Aborting the search after EC detection.");
-                        stateActionStack.clear();
-                        break;
-                    }
-                }
-            }
-            
-            return foundTerminalState;
-        }
-        
        template<typename ValueType>
-        std::tuple<typename SparseMdpLearningModelChecker<ValueType>::StateType, ValueType, ValueType> SparseMdpLearningModelChecker<ValueType>::performLearningProcedure(storm::expressions::Expression const& targetStateExpression, storm::storage::sparse::StateStorage<StateType>& stateStorage, storm::generator::PrismNextStateGenerator<ValueType, StateType>& generator, std::function<StateType (storm::generator::CompressedState const&)> const& stateToIdCallback, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& matrix, std::vector<StateType>& rowGroupIndices, std::vector<StateType>& stateToRowGroupMapping, std::unordered_map<StateType, storm::generator::CompressedState>& unexploredStates, StateType const& unexploredMarker) {
+        void SparseMdpLearningModelChecker<ValueType>::updateProbabilityOfAction(StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues& bounds) const {
+            // Compute the new lower/upper values of the action.
+            std::pair<ValueType, ValueType> newBoundsForAction = computeBoundsOfAction(action, explorationInformation, bounds);
            
-            // Generate the initial state so we know where to start the simulation.
-            stateStorage.initialStateIndices = generator.getInitialStates(stateToIdCallback);
-            STORM_LOG_THROW(stateStorage.initialStateIndices.size() == 1, storm::exceptions::NotSupportedException, "Currently only models with one initial state are supported by the learning engine.");
-            StateType initialStateIndex = stateStorage.initialStateIndices.front();
+            // And set them as the current value.
+            bounds.setBoundForAction(action, newBoundsForAction);
            
-            // A set storing all states in which to terminate the search.
-            boost::container::flat_set<StateType> terminalStates;
+            // Check if we need to update the values for the states.
+            bounds.setNewLowerBoundOfStateIfGreaterThanOld(state, explorationInformation, newBoundsForAction.first);
            
-            // Vectors to store the lower/upper bounds for each action (in each state).
-            std::vector<ValueType> lowerBoundsPerAction;
-            std::vector<ValueType> upperBoundsPerAction;
-            std::vector<ValueType> lowerBoundsPerState;
-            std::vector<ValueType> upperBoundsPerState;
+            StateType rowGroup = explorationInformation.getRowGroup(state);
+            if (newBoundsForAction < bounds.getUpperBoundOfRowGroup(rowGroup)) {
                
-            // Since we might run into end-components, we track a mapping from states in ECs to all leaving choices of
-            // that EC.
-            std::unordered_map<StateType, ChoiceSetPointer> stateToLeavingChoicesOfEndComponent;
+            }
            
-            // Now perform the actual sampling.
-            std::vector<std::pair<StateType, uint32_t>> stateActionStack;
+            ValueType upperBound = computeUpperBoundOverAllOtherActions(state, action, explorationInformation, bounds);
            
-            Statistics stats;
-            bool convergenceCriterionMet = false;
-            while (!convergenceCriterionMet) {
-                bool result = samplePathFromState(generator, stateToIdCallback, initialStateIndex, stateActionStack, unexploredStates, unexploredMarker, terminalStates, matrix, rowGroupIndices, stateToRowGroupMapping, stateToLeavingChoicesOfEndComponent, targetStateExpression, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, stats);
+            uint32_t sourceRowGroup = stateToRowGroupMapping[sourceStateId];
+            if (newUpperValue < upperBoundsPerState[sourceRowGroup]) {
+                if (rowGroupIndices[sourceRowGroup + 1] - rowGroupIndices[sourceRowGroup] > 1) {
+                    ValueType max = storm::utility::zero<ValueType>();

-                // If a terminal state was found, we update the probabilities along the path contained in the stack.
-                if (result) {
-                    // Update the bounds along the path to the terminal state.
-                    STORM_LOG_TRACE("Found terminal state, updating probabilities along path.");
-                    updateProbabilitiesUsingStack(stateActionStack, matrix, rowGroupIndices, stateToRowGroupMapping, lowerBoundsPerAction, upperBoundsPerAction, lowerBoundsPerState, upperBoundsPerState, unexploredMarker);
-                } else {
-                    // If not terminal state was found, the search aborted, possibly because of an EC-detection. In this
-                    // case, we cannot update the probabilities.
-                    STORM_LOG_TRACE("Did not find terminal state.");
+                    for (uint32_t currentAction = rowGroupIndices[sourceRowGroup]; currentAction < rowGroupIndices[sourceRowGroup + 1]; ++currentAction) {
+                        std::cout << "cur: " << currentAction << std::endl;
+                        if (currentAction == action) {
+                            continue;
                        }
                        
-                // Sanity check of results.
-                for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
-                    if (stateToRowGroupMapping[state] != unexploredMarker) {
-                        STORM_LOG_ASSERT(lowerBoundsPerState[stateToRowGroupMapping[state]] <= upperBoundsPerState[stateToRowGroupMapping[state]], "The bounds for state " << state << " are not in a sane relation: " << lowerBoundsPerState[stateToRowGroupMapping[state]] << " > " << upperBoundsPerState[stateToRowGroupMapping[state]] << ".");
-                    }
+                        ValueType currentValue = storm::utility::zero<ValueType>();
+                        for (auto const& element : transitionMatrix[currentAction]) {
+                            currentValue += element.getValue() * (stateToRowGroupMapping[element.getColumn()] == unexploredMarker ? storm::utility::one<ValueType>() : upperBoundsPerState[stateToRowGroupMapping[element.getColumn()]]);
                        }
-
-//                 TODO: remove debug output when superfluous
-//                for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
-//                    if (stateToRowGroupMapping[state] != unexploredMarker) {
-//                        std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: ";
-//                        for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) {
-//                            std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], ";
-//                        }
-//                        std::cout << std::endl;
-//                    } else {
-//                        std::cout << "state " << state << " is unexplored" << std::endl;
-//                    }
-//                }
-                
-                STORM_LOG_DEBUG("Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored).");
-                STORM_LOG_DEBUG("Value of initial state is in [" << lowerBoundsPerState[initialStateIndex] << ", " << upperBoundsPerState[initialStateIndex] << "].");
-                ValueType difference = upperBoundsPerState[initialStateIndex] - lowerBoundsPerState[initialStateIndex];
-                STORM_LOG_DEBUG("Difference after iteration " << stats.iterations << " is " << difference << ".");
-                convergenceCriterionMet = difference < 1e-6;
-                
-                ++stats.iterations;
+                        max = std::max(max, currentValue);
+                        std::cout << "max is " << max << std::endl;
                    }
                    
-            if (storm::settings::generalSettings().isShowStatisticsSet()) {
-                std::cout << std::endl << "Learning summary -------------------------" << std::endl;
-                std::cout << "Discovered states: " << stateStorage.numberOfStates << " (" << unexploredStates.size() << " unexplored, " << stats.numberOfTargetStates << " target states)" << std::endl;
-                std::cout << "Sampling iterations: " << stats.iterations << std::endl;
-                std::cout << "Maximal path length: " << stats.maxPathLength << std::endl;
+                    newUpperValue = std::max(newUpperValue, max);
                }
                
-            return std::make_tuple(initialStateIndex, lowerBoundsPerState[initialStateIndex], upperBoundsPerState[initialStateIndex]);
+                if (newUpperValue < upperBoundsPerState[sourceRowGroup]) {
+                    STORM_LOG_TRACE("Got new upper bound for state " << sourceStateId << ": " << newUpperValue << " (was " << upperBoundsPerState[sourceRowGroup] << ").");
+                    std::cout << "writing at index " << sourceRowGroup << std::endl;
+                    upperBoundsPerState[sourceRowGroup] = newUpperValue;
                }
-        
-        template<typename ValueType>
-        std::unique_ptr<CheckResult> SparseMdpLearningModelChecker<ValueType>::computeReachabilityProbabilities(CheckTask<storm::logic::EventuallyFormula> const& checkTask) {
-            storm::logic::EventuallyFormula const& eventuallyFormula = checkTask.getFormula();
-            storm::logic::Formula const& subformula = eventuallyFormula.getSubformula();
-            STORM_LOG_THROW(subformula.isAtomicExpressionFormula() || subformula.isAtomicLabelFormula(), storm::exceptions::NotSupportedException, "Learning engine can only deal with formulas of the form 'F \"label\"' or 'F expression'.");
-            
-            // Derive the expression for the target states, so we know when to abort the learning run.
-            storm::expressions::Expression targetStateExpression = getTargetStateExpression(subformula);
-            
-            // A container for the encountered states.
-            storm::storage::sparse::StateStorage<StateType> stateStorage(variableInformation.getTotalBitOffset(true));
-            
-            // A generator used to explore the model.
-            storm::generator::PrismNextStateGenerator<ValueType, StateType> generator(program, variableInformation, false);
-            
-            // A container that stores the transitions found so far.
-            std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> matrix;
-            
-            // A vector storing where the row group of each state starts.
-            std::vector<StateType> rowGroupIndices;
-            rowGroupIndices.push_back(0);
-            
-            // A vector storing the mapping from state ids to row groups.
-            std::vector<StateType> stateToRowGroupMapping;
-            StateType unexploredMarker = std::numeric_limits<StateType>::max();
-            
-            // A mapping of unexplored IDs to their actual compressed states.
-            std::unordered_map<StateType, storm::generator::CompressedState> unexploredStates;
-            
-            // Create a callback for the next-state generator to enable it to request the index of states.
-            std::function<StateType (storm::generator::CompressedState const&)> stateToIdCallback = [&stateStorage, &stateToRowGroupMapping, &unexploredStates, &unexploredMarker] (storm::generator::CompressedState const& state) -> StateType {
-                StateType newIndex = stateStorage.numberOfStates;
-                
-                // Check, if the state was already registered.
-                std::pair<uint32_t, std::size_t> actualIndexBucketPair = stateStorage.stateToId.findOrAddAndGetBucket(state, newIndex);
-                
-                if (actualIndexBucketPair.first == newIndex) {
-                    ++stateStorage.numberOfStates;
-                    stateToRowGroupMapping.push_back(unexploredMarker);
-                    unexploredStates[newIndex] = state;
            }
-                
-                return actualIndexBucketPair.first;
-            };
-            
-            // Compute and return result.
-            std::tuple<StateType, ValueType, ValueType> boundsForInitialState = performLearningProcedure(targetStateExpression, stateStorage, generator, stateToIdCallback, matrix, rowGroupIndices, stateToRowGroupMapping, unexploredStates, unexploredMarker);
-            return std::make_unique<ExplicitQuantitativeCheckResult<ValueType>>(std::get<0>(boundsForInitialState), std::get<1>(boundsForInitialState));
        }
        
        template class SparseMdpLearningModelChecker<double>;
--- a/src/modelchecker/reachability/SparseMdpLearningModelChecker.h
+++ b/src/modelchecker/reachability/SparseMdpLearningModelChecker.h
@ -6,7 +6,9 @@
 #include "src/modelchecker/AbstractModelChecker.h"

 #include "src/storage/prism/Program.h"
+#include "src/storage/sparse/StateStorage.h"

+#include "src/generator/PrismNextStateGenerator.h"
 #include "src/generator/CompressedState.h"
 #include "src/generator/VariableInformation.h"

@ -32,8 +34,11 @@ namespace storm {
        class SparseMdpLearningModelChecker : public AbstractModelChecker {
        public:
            typedef uint32_t StateType;
-            typedef boost::container::flat_set<StateType> ChoiceSet;
-            typedef std::shared_ptr<ChoiceSet> ChoiceSetPointer;
+            typedef uint32_t ActionType;
+            typedef boost::container::flat_set<StateType> StateSet;
+            typedef boost::container::flat_set<ActionType> ActionSet;
+            typedef std::shared_ptr<ActionSet> ActionSetPointer;
+            typedef std::vector<std::pair<StateType, ActionType>> StateActionStack;
            
            SparseMdpLearningModelChecker(storm::prism::Program const& program, boost::optional<std::map<storm::expressions::Variable, storm::expressions::Expression>> const& constantDefinitions);
            
@ -42,37 +47,270 @@ namespace storm {
            virtual std::unique_ptr<CheckResult> computeReachabilityProbabilities(CheckTask<storm::logic::EventuallyFormula> const& checkTask) override;
            
        private:
+            // A struct that keeps track of certain statistics during the computation.
            struct Statistics {
-                Statistics() : iterations(0), maxPathLength(0), numberOfTargetStates(0), pathLengthUntilEndComponentDetection(27) {
+                Statistics() : iterations(0), maxPathLength(0), numberOfTargetStates(0), numberOfExploredStates(0), pathLengthUntilEndComponentDetection(27) {
                    // Intentionally left empty.
                }
+                
                std::size_t iterations;
                std::size_t maxPathLength;
                std::size_t numberOfTargetStates;
+                std::size_t numberOfExploredStates;
                std::size_t pathLengthUntilEndComponentDetection;
            };
            
-            bool exploreState(storm::generator::PrismNextStateGenerator<ValueType, StateType>& generator, std::function<StateType (storm::generator::CompressedState const&)> const& stateToIdCallback, StateType const& currentStateId, storm::generator::CompressedState const& compressedState, StateType const& unexploredMarker, boost::container::flat_set<StateType>& terminalStates, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& matrix, std::vector<StateType>& rowGroupIndices, std::vector<StateType>& stateToRowGroupMapping, storm::expressions::Expression const& targetStateExpression, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, Statistics& stats);
+            // A struct containing the data required for state exploration.
+            struct StateGeneration {
+                StateGeneration(storm::generator::PrismNextStateGenerator<ValueType, StateType>&& generator, storm::expressions::Expression const& targetStateExpression) : generator(std::move(generator)), targetStateExpression(targetStateExpression) {
+                    // Intentionally left empty.
+                }
+                
+                std::vector<StateType> getInitialStates() {
+                    return generator.getInitialStates(stateToIdCallback);
+                }
+                
+                storm::generator::StateBehavior<ValueType, StateType> expand() {
+                    return generator.expand(stateToIdCallback);
+                }
+                
+                bool isTargetState() const {
+                    return generator.satisfies(targetStateExpression);
+                }
+                
+                storm::generator::PrismNextStateGenerator<ValueType, StateType> generator;
+                std::function<StateType (storm::generator::CompressedState const&)> stateToIdCallback;
+                storm::expressions::Expression targetStateExpression;
+            };
+            
+            // A structure containing the data assembled during exploration.
+            struct ExplorationInformation {
+                ExplorationInformation(uint_fast64_t bitsPerBucket, ActionType const& unexploredMarker = std::numeric_limits<ActionType>::max()) : stateStorage(bitsPerBucket), unexploredMarker(unexploredMarker) {
+                    // Intentionally left empty.
+                }
+                
+                storm::storage::sparse::StateStorage<StateType> stateStorage;
+                
+                std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> matrix;
+                std::vector<StateType> rowGroupIndices;
+                
+                std::vector<StateType> stateToRowGroupMapping;
+                StateType unexploredMarker;
+                std::unordered_map<StateType, storm::generator::CompressedState> unexploredStates;
+                
+                storm::OptimizationDirection optimizationDirection;
+                StateSet terminalStates;
+                
+                std::unordered_map<StateType, ActionSetPointer> stateToLeavingChoicesOfEndComponent;
+                
+                void setInitialStates(std::vector<StateType> const& initialStates) {
+                    stateStorage.initialStateIndices = initialStates;
+                }
+                
+                StateType getFirstInitialState() const {
+                    return stateStorage.initialStateIndices.front();
+                }
+                
+                std::size_t getNumberOfInitialStates() const {
+                    return stateStorage.initialStateIndices.size();
+                }
+                
+                void addUnexploredState(storm::generator::CompressedState const& compressedState) {
+                    stateToRowGroupMapping.push_back(unexploredMarker);
+                    unexploredStates[stateStorage.numberOfStates] = compressedState;
+                    ++stateStorage.numberOfStates;
+                }
+                
+                void assignStateToRowGroup(StateType const& state, ActionType const& rowGroup) {
+                    stateToRowGroupMapping[state] = rowGroup;
+                }
+                
+                void assignStateToNextRowGroup(StateType const& state) {
+                    stateToRowGroupMapping[state] = rowGroupIndices.size() - 1;
+                }
+                
+                void newRowGroup(ActionType const& action) {
+                    rowGroupIndices.push_back(action);
+                }
+                
+                void newRowGroup() {
+                    newRowGroup(matrix.size());
+                }
+                
+                std::size_t getNumberOfUnexploredStates() const {
+                    return unexploredStates.size();
+                }
+                
+                std::size_t getNumberOfDiscoveredStates() const {
+                    return stateStorage.numberOfStates;
+                }
+                
+                StateType const& getRowGroup(StateType const& state) const {
+                    return stateToRowGroupMapping[state];
+                }
+                
+                StateType const& getUnexploredMarker() const {
+                    return unexploredMarker;
+                }
+                
+                bool isUnexplored(StateType const& state) const {
+                    return unexploredStates.find(state) == unexploredStates.end();
+                }
+                
+                bool isTerminal(StateType const& state) const {
+                    return terminalStates.find(state) != terminalStates.end();
+                }
+                
+                ActionType const& getStartRowOfGroup(StateType const& group) const {
+                    return rowGroupIndices[group];
+                }
+                
+                void addTerminalState(StateType const& state) {
+                    terminalStates.insert(state);
+                }
+                
+                std::vector<storm::storage::MatrixEntry<StateType, ValueType>>& getRowOfMatrix(ActionType const& row) {
+                    return matrix[row];
+                }
+                
+                std::vector<storm::storage::MatrixEntry<StateType, ValueType>> const& getRowOfMatrix(ActionType const& row) const {
+                    return matrix[row];
+                }
+                
+                void addRowsToMatrix(std::size_t const& count) {
+                    matrix.resize(matrix.size() + count);
+                }
+            };
+            
+            // A struct containg the lower and upper bounds per state and action.
+            struct BoundValues {
+                std::vector<ValueType> lowerBoundsPerState;
+                std::vector<ValueType> upperBoundsPerState;
+                std::vector<ValueType> lowerBoundsPerAction;
+                std::vector<ValueType> upperBoundsPerAction;
+                
+                std::pair<ValueType, ValueType> getBoundsForState(StateType const& state, ExplorationInformation const& explorationInformation) const {
+                    ActionType index = explorationInformation.getRowGroup(state);
+                    if (index == explorationInformation.getUnexploredMarker()) {
+                        return std::make_pair(storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
+                    } else {
+                        return std::make_pair(lowerBoundsPerState[index], upperBoundsPerState[index]);
+                    }
+                }
+                
+                ValueType getLowerBoundForState(StateType const& state, ExplorationInformation const& explorationInformation) const {
+                    ActionType index = explorationInformation.getRowGroup(state);
+                    if (index == explorationInformation.getUnexploredMarker()) {
+                        return storm::utility::zero<ValueType>();
+                    } else {
+                        return getLowerBoundForRowGroup(index, explorationInformation);
+                    }
+                }
+                
+                ValueType getLowerBoundForRowGroup(StateType const& rowGroup, ExplorationInformation const& explorationInformation) const {
+                    return lowerBoundsPerState[rowGroup];
+                }
+                
+                ValueType getUpperBoundForState(StateType const& state, ExplorationInformation const& explorationInformation) const {
+                    ActionType index = explorationInformation.getRowGroup(state);
+                    if (index == explorationInformation.getUnexploredMarker()) {
+                        return storm::utility::one<ValueType>();
+                    } else {
+                        return getUpperBoundForRowGroup(index, explorationInformation);
+                    }
+                }
+                
+                ValueType getUpperBoundForRowGroup(StateType const& rowGroup, ExplorationInformation const& explorationInformation) const {
+                    return upperBoundsPerState[rowGroup];
+                }
+                
+                std::pair<ValueType, ValueType> getBoundsForAction(ActionType const& action) const {
+                    return std::make_pair(lowerBoundsPerAction[action], upperBoundsPerAction[action]);
+                }
+                
+                ValueType const& getLowerBoundForAction(ActionType const& action) const {
+                    return lowerBoundsPerAction[action];
+                }
+
+                ValueType const& getUpperBoundForAction(ActionType const& action) const {
+                    return upperBoundsPerAction[action];
+                }
+                
+                ValueType getDifferenceOfStateBounds(StateType const& state, ExplorationInformation const& explorationInformation) {
+                    std::pair<ValueType, ValueType> bounds = getBoundsForState(state, explorationInformation);
+                    return bounds.second - bounds.first;
+                }
+                
+                void initializeStateBoundsForNextState(std::pair<ValueType, ValueType> const& vals = std::pair<ValueType, ValueType>(storm::utility::zero<ValueType>(), storm::utility::one<ValueType>())) {
+                    lowerBoundsPerState.push_back(vals.first);
+                    upperBoundsPerState.push_back(vals.second);
+                }
+                
+                void initializeActionBoundsForNextAction(std::pair<ValueType, ValueType> const& vals = std::pair<ValueType, ValueType>(storm::utility::zero<ValueType>(), storm::utility::one<ValueType>())) {
+                    lowerBoundsPerAction.push_back(vals.first);
+                    upperBoundsPerAction.push_back(vals.second);
+                }
+                
+                void setLowerBoundForState(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& value) {
+                    lowerBoundsPerState[explorationInformation.getRowGroup(state)] = value;
+                }
+                
+                void setUpperBoundForState(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& value) {
+                    upperBoundsPerState[explorationInformation.getRowGroup(state)] = value;
+                }
+                
+                void setBoundsForAction(ActionType const& action, std::pair<ValueType, ValueType> const& values) {
+                    lowerBoundsPerAction[action] = values.first;
+                    upperBoundsPerAction[action] = values.second;
+                }
+
+                void setBoundsForState(StateType const& state, ExplorationInformation const& explorationInformation, std::pair<ValueType, ValueType> const& values) {
+                    StateType const& rowGroup = explorationInformation.getRowGroup(state);
+                    lowerBoundsPerState[rowGroup] = values.first;
+                    upperBoundsPerState[rowGroup] = values.second;
+                }
+                
+                bool setNewLowerBoundOfStateIfGreaterThanOld(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& newLowerValue) {
+                    StateType const& rowGroup = explorationInformation.getRowGroup(state);
+                    if (lowerBoundsPerState[rowGroup] < newLowerValue) {
+                        lowerBoundsPerState[rowGroup] = newLowerValue;
+                    }
+                }
+
+                bool setNewUpperBoundOfStateIfLessThanOld(StateType const& state, ExplorationInformation const& explorationInformation, ValueType const& newUpperValue) {
+                    StateType const& rowGroup = explorationInformation.getRowGroup(state);
+                    if (newUpperValue < upperBoundsPerState[rowGroup]) {
+                        upperBoundsPerState[rowGroup] = newUpperValue;
+                    }
+                }
+            };
+            
+            storm::expressions::Expression getTargetStateExpression(storm::logic::Formula const& subformula) const;
            
-            bool samplePathFromState(storm::generator::PrismNextStateGenerator<ValueType, StateType>& generator, std::function<StateType (storm::generator::CompressedState const&)> const& stateToIdCallback, StateType initialStateIndex, std::vector<std::pair<StateType, uint32_t>>& stateActionStack, std::unordered_map<StateType, storm::generator::CompressedState>& unexploredStates, StateType const& unexploredMarker, boost::container::flat_set<StateType>& terminalStates, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& matrix, std::vector<StateType>& rowGroupIndices, std::vector<StateType>& stateToRowGroupMapping, std::unordered_map<StateType, ChoiceSetPointer>& stateToLeavingChoicesOfEndComponent, storm::expressions::Expression const& targetStateExpression, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, Statistics& stats);
+            std::function<StateType (storm::generator::CompressedState const&)> createStateToIdCallback(ExplorationInformation& explorationInformation) const;
            
-            std::tuple<StateType, ValueType, ValueType> performLearningProcedure(storm::expressions::Expression const& targetStateExpression, storm::storage::sparse::StateStorage<StateType>& stateStorage, storm::generator::PrismNextStateGenerator<ValueType, StateType>& generator, std::function<StateType (storm::generator::CompressedState const&)> const& stateToIdCallback, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& matrix, std::vector<StateType>& rowGroupIndices, std::vector<StateType>& stateToRowGroupMapping, std::unordered_map<StateType, storm::generator::CompressedState>& unexploredStates, StateType const& unexploredMarker);
+            std::tuple<StateType, ValueType, ValueType> performLearningProcedure(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation) const;

-            void updateProbabilities(StateType const& sourceStateId, uint32_t action, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType>& lowerBounds, std::vector<ValueType>& upperBounds, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, StateType const& unexploredMarker) const;
+            bool samplePathFromState(StateGeneration& stateGeneration, ExplorationInformation& explorationInformation, StateActionStack& stack, BoundValues& bounds, Statistics& stats) const;
            
-            void updateProbabilitiesUsingStack(std::vector<std::pair<StateType, uint32_t>>& stateActionStack, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType>& lowerBounds, std::vector<ValueType>& upperBounds, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, StateType const& unexploredMarker) const;
+            bool exploreState(StateGeneration& stateGeneration, StateType const& currentStateId, storm::generator::CompressedState const& currentState, ExplorationInformation& explorationInformation, BoundValues& bounds, Statistics& stats) const;
            
-            uint32_t sampleFromMaxActions(StateType currentStateId, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType> const& upperBoundsPerState, std::unordered_map<StateType, ChoiceSetPointer> const& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker);
+            uint32_t sampleMaxAction(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues& bounds) const;

-            StateType sampleSuccessorFromAction(StateType chosenAction, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping);
+            StateType sampleSuccessorFromAction(ActionType const& chosenAction, ExplorationInformation const& explorationInformation) const;
            
-            void detectEndComponents(std::vector<std::pair<StateType, uint32_t>> const& stateActionStack, boost::container::flat_set<StateType>& terminalStates, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>>& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType>& lowerBoundsPerAction, std::vector<ValueType>& upperBoundsPerAction, std::vector<ValueType>& lowerBoundsPerState, std::vector<ValueType>& upperBoundsPerState, std::unordered_map<StateType, ChoiceSetPointer>& stateToLeavingChoicesOfEndComponent, StateType const& unexploredMarker) const;
+            void detectEndComponents(StateActionStack const& stack, ExplorationInformation& explorationInformation, BoundValues& bounds) const;
            
-            std::pair<ValueType, ValueType> computeValuesOfChoice(uint32_t action, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType> const& lowerBoundsPerState, std::vector<ValueType> const& upperBoundsPerState, StateType const& unexploredMarker);
+            void updateProbabilityBoundsAlongSampledPath(StateActionStack& stack, ExplorationInformation const& explorationInformation, BoundValues& bounds) const;

-            std::pair<ValueType, ValueType> computeValuesOfState(StateType currentStateId, std::vector<std::vector<storm::storage::MatrixEntry<StateType, ValueType>>> const& transitionMatrix, std::vector<StateType> const& rowGroupIndices, std::vector<StateType> const& stateToRowGroupMapping, std::vector<ValueType> const& lowerBounds, std::vector<ValueType> const& upperBounds, std::vector<ValueType> const& lowerBoundsPerState, std::vector<ValueType> const& upperBoundsPerState, StateType const& unexploredMarker);
+            void updateProbabilityOfAction(StateType const& state, ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues& bounds) const;
            
-            storm::expressions::Expression getTargetStateExpression(storm::logic::Formula const& subformula);
+            std::pair<ValueType, ValueType> computeBoundsOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const;
+            std::pair<ValueType, ValueType> computeBoundsOfState(StateType const& currentStateId, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const;
+            ValueType computeLowerBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const;
+            ValueType computeUpperBoundOfAction(ActionType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const;
+            ValueType computeLowerBoundOfState(StateType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const;
+            ValueType computeUpperBoundOfState(StateType const& action, ExplorationInformation const& explorationInformation, BoundValues const& bounds) const;
            
            // The program that defines the model to check.
            storm::prism::Program program;