From fc66e01ed5dcf19d0eaf06b0af1ba3fd26c2bd52 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 31 Jul 2020 16:53:50 +0200 Subject: [PATCH] Nondeterministic Infinite horizon: Split value getters into StateValueGetter and ActionValueGetters. Made VI code more general, so that they may also be used for Markov Automata. --- ...eNondeterministicInfiniteHorizonHelper.cpp | 438 ++++++++++++------ ...rseNondeterministicInfiniteHorizonHelper.h | 16 +- 2 files changed, 313 insertions(+), 141 deletions(-) diff --git a/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.cpp b/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.cpp index 40e8d2dcd..0b8ba57cc 100644 --- a/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.cpp +++ b/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.cpp @@ -31,36 +31,56 @@ namespace storm { template std::vector SparseNondeterministicInfiniteHorizonHelper::computeLongRunAverageProbabilities(Environment const& env, storm::storage::BitVector const& psiStates) { - return computeLongRunAverageValues(env, [&psiStates] (uint64_t stateIndex, uint64_t) { return psiStates.get(stateIndex) ? storm::utility::one() : storm::utility::zero();}); + return computeLongRunAverageValues(env, + [&psiStates] (uint64_t stateIndex) { return psiStates.get(stateIndex) ? storm::utility::one() : storm::utility::zero(); }, + [] (uint64_t) { return storm::utility::zero(); } + ); } template std::vector SparseNondeterministicInfiniteHorizonHelper::computeLongRunAverageRewards(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel) { - if (_markovianStates) { - return computeLongRunAverageValues(env, [&] (uint64_t stateIndex, uint64_t globalChoiceIndex) { - if (rewardModel.hasStateRewards() && _markovianStates->get(stateIndex)) { - return rewardModel.getTotalStateActionReward(stateIndex, globalChoiceIndex, _transitionMatrix, (ValueType) (storm::utility::one() / (*_exitRates)[stateIndex])); - } else { - return rewardModel.getTotalStateActionReward(stateIndex, globalChoiceIndex, _transitionMatrix, storm::utility::zero()); - } - }); + std::function stateRewardsGetter; + if (rewardModel.hasStateRewards()) { + stateRewardsGetter = [&rewardModel] (uint64_t stateIndex) { return rewardModel.getStateReward(stateIndex); }; + } else { + stateRewardsGetter = [] (uint64_t) { return storm::utility::zero(); }; + } + std::function actionRewardsGetter; + if (rewardModel.hasStateActionRewards() || rewardModel.hasTransitionRewards()) { + if (rewardModel.hasTransitionRewards()) { + actionRewardsGetter = [&] (uint64_t globalChoiceIndex) { return rewardModel.getStateActionAndTransitionReward(globalChoiceIndex, this->_transitionMatrix); }; + } else { + actionRewardsGetter = [&] (uint64_t globalChoiceIndex) { return rewardModel.getStateActionReward(globalChoiceIndex); }; + } } else { - return computeLongRunAverageValues(env, [&] (uint64_t stateIndex, uint64_t globalChoiceIndex) { - return rewardModel.getTotalStateActionReward(stateIndex, globalChoiceIndex, _transitionMatrix); - }); + stateRewardsGetter = [] (uint64_t) { return storm::utility::zero(); }; } + + return computeLongRunAverageValues(env, stateRewardsGetter, actionRewardsGetter); } template - std::vector SparseNondeterministicInfiniteHorizonHelper::computeLongRunAverageValues(Environment const& env, std::vector const& combinedStateActionRewards) { - return computeLongRunAverageValues(env, [&combinedStateActionRewards] (uint64_t, uint64_t globalChoiceIndex) { - return combinedStateActionRewards[globalChoiceIndex]; - }); + std::vector SparseNondeterministicInfiniteHorizonHelper::computeLongRunAverageValues(Environment const& env, std::vector const* stateValues, std::vector const* actionValues) { + std::function stateValuesGetter; + if (stateValues) { + stateValuesGetter = [&stateValues] (uint64_t stateIndex) { return (*stateValues)[stateIndex]; }; + } else { + stateValuesGetter = [] (uint64_t) { return storm::utility::zero(); }; + } + std::function actionValuesGetter; + if (actionValues) { + actionValuesGetter = [&actionValues] (uint64_t globalChoiceIndex) { return (*actionValues)[globalChoiceIndex]; }; + } else { + actionValuesGetter = [] (uint64_t) { return storm::utility::zero(); }; + } + + return computeLongRunAverageValues(env, stateValuesGetter, actionValuesGetter); + } template - std::vector SparseNondeterministicInfiniteHorizonHelper::computeLongRunAverageValues(Environment const& env, std::function const& combinedStateActionRewardsGetter) { + std::vector SparseNondeterministicInfiniteHorizonHelper::computeLongRunAverageValues(Environment const& env, std::function const& stateRewardsGetter, std::function const& actionRewardsGetter) { // Prepare an environment for the underlying solvers auto underlyingSolverEnvironment = env; @@ -86,7 +106,7 @@ namespace storm { std::vector mecLraValues; mecLraValues.reserve(mecDecomposition.size()); for (auto const& mec : mecDecomposition) { - mecLraValues.push_back(computeLraForMec(underlyingSolverEnvironment, combinedStateActionRewardsGetter, mec)); + mecLraValues.push_back(computeLraForMec(underlyingSolverEnvironment, stateRewardsGetter, actionRewardsGetter, mec)); } // Solve the resulting SSP where end components are collapsed into single auxiliary states @@ -129,36 +149,45 @@ namespace storm { } template - ValueType SparseNondeterministicInfiniteHorizonHelper::computeLraForMec(Environment const& env, std::function const& combinedStateActionRewardsGetter, storm::storage::MaximalEndComponent const& mec) { + bool SparseNondeterministicInfiniteHorizonHelper::isContinuousTime() const { + STORM_LOG_ASSERT((_markovianStates == nullptr) == (_exitRates == nullptr), "Inconsistent information given: Have Markovian states but no exit rates (or vice versa)." ); + return _markovianStates != nullptr; + } + + template + ValueType SparseNondeterministicInfiniteHorizonHelper::computeLraForMec(Environment const& env, std::function const& stateRewardsGetter, std::function const& actionRewardsGetter, storm::storage::MaximalEndComponent const& mec) { - // FIXME: MA // If the mec only consists of a single state, we compute the LRA value directly - if (++mec.begin() == mec.end()) { + if (mec.size() == 1) { uint64_t state = mec.begin()->first; auto choiceIt = mec.begin()->second.begin(); - ValueType result = combinedStateActionRewardsGetter(state, *choiceIt); - uint64_t bestChoice = *choiceIt; - for (++choiceIt; choiceIt != mec.begin()->second.end(); ++choiceIt) { - ValueType choiceValue = combinedStateActionRewardsGetter(state, *choiceIt); - if (this->minimize()) { - if (result > choiceValue) { - result = std::move(choiceValue); + if (isContinuousTime()) { + // Singleton MECs have to consist of a Markovian state because of the non-Zenoness assumption. Then, there is just one possible choice. + STORM_LOG_THROW(_markovianStates->get(state), storm::exceptions::InvalidOperationException, "Markov Automaton has Zeno behavior. Computation of Long Run Average values not supported."); + STORM_LOG_ASSERT(mec.begin()->second.size() == 1, "Markovian state has Nondeterministic behavior."); + if (isProduceSchedulerSet()) { + _producedOptimalChoices.get()[state] = 0; + } + return stateRewardsGetter(state) + (*_exitRates)[state] * actionRewardsGetter(*choiceIt); + } else { + // Find the choice with the highest/lowest reward + ValueType bestValue = actionRewardsGetter(*choiceIt); + uint64_t bestChoice = *choiceIt; + for (++choiceIt; choiceIt != mec.begin()->second.end(); ++choiceIt) { + ValueType currentValue = actionRewardsGetter(*choiceIt); + if ((this->minimize() && currentValue < bestValue) || (this->maximize() && currentValue > bestValue)) { + bestValue = std::move(currentValue); bestChoice = *choiceIt; } - } else { - if (result < choiceValue) { - result = std::move(choiceValue); - bestChoice = *choiceIt; - } } + if (isProduceSchedulerSet()) { + _producedOptimalChoices.get()[state] = bestChoice - _transitionMatrix.getRowGroupIndices()[state]; + } + return bestValue + stateRewardsGetter(state); } - if (isProduceSchedulerSet()) { - _producedOptimalChoices.get()[state] = bestChoice - _transitionMatrix.getRowGroupIndices()[state]; - } - return result; } - // Solve MEC with the method specified in the settings + // Solve nontrivial MEC with the method specified in the settings storm::solver::LraMethod method = env.solver().lra().getNondetLraMethod(); if ((storm::NumberTraits::IsExact || env.solver().isForceExact()) && env.solver().lra().isNondetLraMethodSetFromDefault() && method != storm::solver::LraMethod::LinearProgramming) { STORM_LOG_INFO("Selecting 'LP' as the solution technique for long-run properties to guarantee exact results. If you want to override this, please explicitly specify a different LRA method."); @@ -169,144 +198,281 @@ namespace storm { } STORM_LOG_ERROR_COND(!isProduceSchedulerSet() || method == storm::solver::LraMethod::ValueIteration, "Scheduler generation not supported for the chosen LRA method. Try value-iteration."); if (method == storm::solver::LraMethod::LinearProgramming) { - return computeLraForMecLp(env, combinedStateActionRewardsGetter, mec); + return computeLraForMecLp(env, stateRewardsGetter, actionRewardsGetter, mec); } else if (method == storm::solver::LraMethod::ValueIteration) { - return computeLraForMecVi(env, combinedStateActionRewardsGetter, mec); + return computeLraForMecVi(env, stateRewardsGetter, actionRewardsGetter, mec); } else { STORM_LOG_THROW(false, storm::exceptions::InvalidSettingsException, "Unsupported technique."); } } - + + /*! + * Abstract helper class that performs a single iteration of the value iteration method + */ template - ValueType SparseNondeterministicInfiniteHorizonHelper::computeLraForMecVi(Environment const& env, std::function const& combinedStateActionRewardsGetter, storm::storage::MaximalEndComponent const& mec) { - // Initialize data about the mec - storm::storage::BitVector mecStates(_transitionMatrix.getRowGroupCount(), false); - storm::storage::BitVector mecChoices(_transitionMatrix.getRowCount(), false); - for (auto const& stateChoicesPair : mec) { - mecStates.set(stateChoicesPair.first); - for (auto const& choice : stateChoicesPair.second) { - mecChoices.set(choice); + class LraViHelper { + public: + LraViHelper(storm::storage::MaximalEndComponent const& mec, storm::storage::SparseMatrix const& transitionMatrix) : _mec(mec), _transitionMatrix(transitionMatrix) { + // Intentionally left empty + } + virtual ~LraViHelper() = default; + + /*! + * performs a single iteration step. + * If a choices vector is given, the optimal choices will be inserted at the appropriate states. + * Note that these choices will be inserted w.r.t. the original model states/choices, i.e. the size of the vector should match the state-count of the input model + * @return the current estimate of the LRA value + */ + virtual void iterate(Environment const& env, storm::solver::OptimizationDirection const& dir, std::vector* choices = nullptr) = 0; + + struct ConvergenceCheckResult { + bool isPrecisionAchieved; + ValueType currentValue; + }; + + /*! + * Checks whether the curently computed value achieves the desired precision + */ + virtual ConvergenceCheckResult checkConvergence(bool relative, ValueType precision) = 0; + + /*! + * Must be called between two calls of iterate. + */ + virtual void prepareNextIteration() = 0; + + protected: + + /*! + * + * @param xPrevious the 'old' values + * @param xCurrent the 'new' values + * @param threshold the threshold + * @param relative whether the relative difference should be considered + * @return The first component is true if the (relative) difference between the maximal and the minimal entry-wise change of the two value vectors is below or equal to the provided threshold. + * In this case, the second component is the average of the maximal and the minimal change. + * If the threshold is exceeded, the computation is aborted early and the second component is only an approximation of the averages. + */ + std::pair checkMinMaxDiffBelowThreshold(std::vector const& xPrevious, std::vector const& xCurrent, ValueType const& threshold, bool relative) const { + STORM_LOG_ASSERT(xPrevious.size() == xCurrent.size(), "Unexpected Dimension Mismatch"); + STORM_LOG_ASSERT(threshold > storm::utility::zero(), "Did not expect a non-positive threshold."); + auto x1It = xPrevious.begin(); + auto x1Ite = xPrevious.end(); + auto x2It = xCurrent.begin(); + ValueType maxDiff = (*x2It - *x1It); + ValueType minDiff = maxDiff; + bool result = true; + // The difference between maxDiff and minDiff is zero at this point. Thus, it doesn't make sense to check the threshold now. + for (++x1It, ++x2It; x1It != x1Ite; ++x1It, ++x2It) { + ValueType diff = (*x2It - *x1It); + // Potentially update maxDiff or minDiff + bool skipCheck = false; + if (maxDiff < diff) { + maxDiff = diff; + } else if (minDiff > diff) { + minDiff = diff; + } else { + skipCheck = true; + } + // Check convergence + if (!skipCheck && (maxDiff - minDiff) > (relative ? (threshold * minDiff) : threshold)) { + result = false; + break; + } } + ValueType avgDiff = (maxDiff + minDiff) / (storm::utility::convertNumber(2.0)); + return {result, avgDiff}; } - boost::container::flat_map toSubModelStateMapping; - uint64_t currState = 0; - toSubModelStateMapping.reserve(mecStates.getNumberOfSetBits()); - for (auto const& mecState : mecStates) { - toSubModelStateMapping.insert(std::pair(mecState, currState)); - ++currState; - } - - // Get a transition matrix that only considers the states and choices within the MEC - storm::storage::SparseMatrixBuilder mecTransitionBuilder(mecChoices.getNumberOfSetBits(), mecStates.getNumberOfSetBits(), 0, true, true, mecStates.getNumberOfSetBits()); - std::vector choiceValues; - choiceValues.reserve(mecChoices.getNumberOfSetBits()); - uint64_t currRow = 0; - ValueType selfLoopProb = storm::utility::convertNumber(env.solver().lra().getAperiodicFactor()); - ValueType scalingFactor = storm::utility::one() - selfLoopProb; - for (auto const& mecState : mecStates) { - mecTransitionBuilder.newRowGroup(currRow); - uint64_t groupStart = _transitionMatrix.getRowGroupIndices()[mecState]; - uint64_t groupEnd = _transitionMatrix.getRowGroupIndices()[mecState + 1]; - for (uint64_t choice = mecChoices.getNextSetIndex(groupStart); choice < groupEnd; choice = mecChoices.getNextSetIndex(choice + 1)) { - bool insertedDiagElement = false; - for (auto const& entry : _transitionMatrix.getRow(choice)) { - uint64_t column = toSubModelStateMapping[entry.getColumn()]; - if (!insertedDiagElement && entry.getColumn() > mecState) { - mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb); - insertedDiagElement = true; + storm::storage::MaximalEndComponent const& _mec; + storm::storage::SparseMatrix const& _transitionMatrix; + }; + + /*! + * Abstract helper class that performs a single iteration of the value iteration method + * @see Ashok et al.: Value Iteration for Long-Run Average Reward in Markov Decision Processes (CAV'17), https://doi.org/10.1007/978-3-319-63387-9_10 + */ + template + class MdpLraViHelper : public LraViHelper { + public: + + MdpLraViHelper(storm::storage::MaximalEndComponent const& mec, storm::storage::SparseMatrix const& transitionMatrix, std::function const& stateRewardsGetter, std::function const& actionRewardsGetter, ValueType const& aperiodicFactor) : LraViHelper(mec, transitionMatrix), _x1(mec.size(), storm::utility::zero()), _x2(_x1), _x1IsCurrent(true) { + + // We add a selfloop to each state (which is necessary for convergence) + // Very roughly, this selfloop avoids that the values can flip around like this: [1, 0] -> [0, 1] -> [1, 0] -> ... + ValueType selfLoopProb = aperiodicFactor; + // Introducing the selfloop also requires the rewards to be scaled by the following factor. + _scalingFactor = storm::utility::one() - selfLoopProb; + + uint64_t numMecStates = this->_mec.size(); + boost::container::flat_map toSubModelStateMapping; + toSubModelStateMapping.reserve(this->_mec.size()); + uint64_t currState = 0; + uint64_t numMecChoices = 0; + for (auto const& stateChoices : this->_mec) { + toSubModelStateMapping.insert(std::pair(stateChoices.first, currState)); + ++currState; + numMecChoices += stateChoices.second.size(); + } + assert(currState == numMecStates); + + // Get a transition matrix that only considers the states and choices within the MEC + storm::storage::SparseMatrixBuilder mecTransitionBuilder(numMecChoices, numMecStates, 0, true, true, numMecStates); + _choiceValues.reserve(numMecChoices); + uint64_t currRow = 0; + for (auto const& stateChoices : this->_mec) { + auto const& mecState = stateChoices.first; + auto const& mecChoices = stateChoices.second; + mecTransitionBuilder.newRowGroup(currRow); + for (auto const& choice : mecChoices) { + bool insertedDiagElement = false; + for (auto const& entry : this->_transitionMatrix.getRow(choice)) { + uint64_t column = toSubModelStateMapping[entry.getColumn()]; + if (!insertedDiagElement && entry.getColumn() > mecState) { + mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb); + insertedDiagElement = true; + } + if (!insertedDiagElement && entry.getColumn() == mecState) { + mecTransitionBuilder.addNextValue(currRow, column, selfLoopProb + _scalingFactor * entry.getValue()); + insertedDiagElement = true; + } else { + mecTransitionBuilder.addNextValue(currRow, column, _scalingFactor * entry.getValue()); + } } - if (!insertedDiagElement && entry.getColumn() == mecState) { - mecTransitionBuilder.addNextValue(currRow, column, selfLoopProb + scalingFactor * entry.getValue()); - insertedDiagElement = true; - } else { - mecTransitionBuilder.addNextValue(currRow, column, scalingFactor * entry.getValue()); + if (!insertedDiagElement) { + mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb); } + + // Compute the rewards obtained for this choice + _choiceValues.push_back(_scalingFactor * (stateRewardsGetter(mecState) + actionRewardsGetter(choice))); + + ++currRow; } - if (!insertedDiagElement) { - mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb); + } + + _mecTransitions = mecTransitionBuilder.build(); + + STORM_LOG_ASSERT(_mecTransitions.isProbabilistic(), "The MEC-Matrix is not probabilistic."); + STORM_LOG_ASSERT(_mecTransitions.getRowGroupCount() == _x1.size(), "Unexpected size mismatch for created matrix."); + STORM_LOG_ASSERT(_x1.size() == _x2.size(), "Unexpected size mismatch for created matrix."); + } + + virtual void iterate(Environment const& env, storm::solver::OptimizationDirection const& dir, std::vector* choices = nullptr) override { + // Initialize a multipler if it does not exist, yet + if (!_multiplier) { + _multiplier = storm::solver::MultiplierFactory().create(env, _mecTransitions); + } + + if (choices == nullptr) { + // Perform a simple matrix-vector multiplication + _multiplier->multiplyAndReduce(env, dir, xCurrent(), &_choiceValues, xPrevious()); + } else { + // Perform a simple matrix-vector multiplication but also keep track of the choices within the _mecTransitions + std::vector mecChoices(_mecTransitions.getRowGroupCount()); + _multiplier->multiplyAndReduce(env, dir, xCurrent(), &_choiceValues, xPrevious(), &mecChoices); + // Transform the local choices (within this mec) to global indices + uint64_t mecState = 0; + for (auto const& stateChoices : this->_mec) { + uint64_t mecChoice = mecChoices[mecState]; + STORM_LOG_ASSERT(mecChoice < stateChoices.second.size(), "The selected choice does not seem to exist."); + uint64_t globalChoiceIndex = *(stateChoices.second.begin() + mecChoice); + (*choices)[stateChoices.first] = globalChoiceIndex - this->_transitionMatrix.getRowGroupIndices()[stateChoices.first]; + ++mecState; } - - // Compute the rewards obtained for this choice - choiceValues.push_back(scalingFactor * combinedStateActionRewardsGetter(mecState, choice)); - - ++currRow; } + + // Swap current and previous x vectors + _x1IsCurrent = !_x1IsCurrent; + } - auto mecTransitions = mecTransitionBuilder.build(); - STORM_LOG_ASSERT(mecTransitions.isProbabilistic(), "The MEC-Matrix is not probabilistic."); - // start the iterations - ValueType precision = storm::utility::convertNumber(env.solver().lra().getPrecision()) / scalingFactor; - bool relative = env.solver().lra().getRelativeTerminationCriterion(); - std::vector x(mecTransitions.getRowGroupCount(), storm::utility::zero()); - std::vector xPrime = x; - auto dir = this->getOptimizationDirection(); + virtual typename LraViHelper::ConvergenceCheckResult checkConvergence(bool relative, ValueType precision) override { + typename LraViHelper::ConvergenceCheckResult res; + std::tie(res.isPrecisionAchieved, res.currentValue) = this->checkMinMaxDiffBelowThreshold(xPrevious(), xCurrent(), precision, relative); + res.currentValue /= _scalingFactor; // "Undo" the scaling of the rewards + return res; + } - auto multiplier = storm::solver::MultiplierFactory().create(env, mecTransitions); - ValueType maxDiff, minDiff; + virtual void prepareNextIteration() override { + // To avoid large (and numerically unstable) x-values, we substract a reference value. + ValueType referenceValue = xCurrent().front(); + storm::utility::vector::applyPointwise(xCurrent(), xCurrent(), [&referenceValue] (ValueType const& x_i) -> ValueType { return x_i - referenceValue; }); + } - uint64_t iter = 0; + private: + + std::vector& xCurrent() { + return _x1IsCurrent ? _x1 : _x2; + } + + std::vector& xPrevious() { + return _x1IsCurrent ? _x2 : _x1; + } + + storm::storage::SparseMatrix _mecTransitions; + std::vector _x1, _x2, _choiceValues; + bool _x1IsCurrent; + std::unique_ptr> _multiplier; + ValueType _scalingFactor; + }; + + template + ValueType SparseNondeterministicInfiniteHorizonHelper::computeLraForMecVi(Environment const& env, std::function const& stateRewardsGetter, std::function const& actionRewardsGetter, storm::storage::MaximalEndComponent const& mec) { + + // Collect some parameters of the computation. + ValueType aperiodicFactor = storm::utility::convertNumber(env.solver().lra().getAperiodicFactor()); + ValueType precision = storm::utility::convertNumber(env.solver().lra().getPrecision()) / aperiodicFactor; + bool relative = env.solver().lra().getRelativeTerminationCriterion(); boost::optional maxIter; if (env.solver().lra().isMaximalIterationCountSet()) { maxIter = env.solver().lra().getMaximalIterationCount(); } + auto dir = this->getOptimizationDirection(); + + // Create an object for the iterations + std::shared_ptr> iterationHelper; + if (isContinuousTime()) { + // TODO + } else { + iterationHelper = std::make_shared>(mec, _transitionMatrix, stateRewardsGetter, actionRewardsGetter, aperiodicFactor); + } + + // start the iterations + ValueType result = storm::utility::zero(); + uint64_t iter = 0; while (!maxIter.is_initialized() || iter < maxIter.get()) { ++iter; - // Compute the obtained values for the next step - multiplier->multiplyAndReduce(env, dir, x, &choiceValues, x); - - // update xPrime and check for convergence - // to avoid large (and numerically unstable) x-values, we substract a reference value. - auto xIt = x.begin(); - auto xPrimeIt = xPrime.begin(); - ValueType refVal = *xIt; - maxDiff = *xIt - *xPrimeIt; - minDiff = maxDiff; - *xIt -= refVal; - *xPrimeIt = *xIt; - for (++xIt, ++xPrimeIt; xIt != x.end(); ++xIt, ++xPrimeIt) { - ValueType diff = *xIt - *xPrimeIt; - maxDiff = std::max(maxDiff, diff); - minDiff = std::min(minDiff, diff); - *xIt -= refVal; - *xPrimeIt = *xIt; - } - - if ((maxDiff - minDiff) <= (relative ? (precision * minDiff) : precision)) { + iterationHelper->iterate(env, dir); + // Check if we are done + auto convergenceCheckResult = iterationHelper->checkConvergence(relative, precision); + result = convergenceCheckResult.currentValue; + if (convergenceCheckResult.isPrecisionAchieved) { break; } if (storm::utility::resources::isTerminate()) { break; } + + iterationHelper->prepareNextIteration(); + } if (maxIter.is_initialized() && iter == maxIter.get()) { STORM_LOG_WARN("LRA computation did not converge within " << iter << " iterations."); + } else if (storm::utility::resources::isTerminate()) { + STORM_LOG_WARN("LRA computation aborted after " << iter << " iterations."); } else { STORM_LOG_TRACE("LRA computation converged after " << iter << " iterations."); } if (isProduceSchedulerSet()) { - std::vector localMecChoices(mecTransitions.getRowGroupCount(), 0); - multiplier->multiplyAndReduce(env, dir, x, &choiceValues, x, &localMecChoices); - auto localMecChoiceIt = localMecChoices.begin(); - for (auto const& mecState : mecStates) { - // Get the choice index of the selected mec choice with respect to the global transition matrix. - uint_fast64_t globalChoice = mecChoices.getNextSetIndex(_transitionMatrix.getRowGroupIndices()[mecState]); - for (uint_fast64_t i = 0; i < *localMecChoiceIt; ++i) { - globalChoice = mecChoices.getNextSetIndex(globalChoice + 1); - } - STORM_LOG_ASSERT(globalChoice < _transitionMatrix.getRowGroupIndices()[mecState + 1], "Invalid global choice for mec state."); - _producedOptimalChoices.get()[mecState] = globalChoice - _transitionMatrix.getRowGroupIndices()[mecState]; - ++localMecChoiceIt; - } + // We will be doing one more iteration step and track scheduler choices this time. + iterationHelper->prepareNextIteration(); + iterationHelper->iterate(env, dir, &_producedOptimalChoices.get()); } - return (maxDiff + minDiff) / (storm::utility::convertNumber(2.0) * scalingFactor); - + return result; } template - ValueType SparseNondeterministicInfiniteHorizonHelper::computeLraForMecLp(Environment const& env, std::function const& combinedStateActionRewardsGetter, storm::storage::MaximalEndComponent const& mec) { + ValueType SparseNondeterministicInfiniteHorizonHelper::computeLraForMecLp(Environment const& env, std::function const& stateRewardsGetter, std::function const& actionRewardsGetter, storm::storage::MaximalEndComponent const& mec) { std::shared_ptr> solver = storm::utility::solver::getLpSolver("LRA for MEC"); solver->setOptimizationDirection(invert(this->getOptimizationDirection())); @@ -330,7 +496,7 @@ namespace storm { for (auto element : _transitionMatrix.getRow(choice)) { constraint = constraint + stateToVariableMap.at(element.getColumn()) * solver->getConstant(element.getValue()); } - constraint = solver->getConstant(combinedStateActionRewardsGetter(state, choice)) + constraint; + constraint = solver->getConstant(stateRewardsGetter(state) + actionRewardsGetter(choice)) + constraint; if (this->minimize()) { constraint = stateToVariableMap.at(state) <= constraint; diff --git a/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.h b/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.h index 7ca921aee..b2d30fc4d 100644 --- a/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.h +++ b/src/storm/modelchecker/helper/infinitehorizon/SparseNondeterministicInfiniteHorizonHelper.h @@ -44,13 +44,13 @@ namespace storm { * Computes the long run average value given the provided action-based rewards * @return a value for each state */ - std::vector computeLongRunAverageValues(Environment const& env, std::vector const& combinedStateActionRewards); + std::vector computeLongRunAverageValues(Environment const& env, std::vector const* stateValues = nullptr, std::vector const* actionValues = nullptr); /*! * Computes the long run average value given the provided state-action-based rewards * @return a value for each state */ - std::vector computeLongRunAverageValues(Environment const& env, std::function const& combinedStateActionRewardsGetter); + std::vector computeLongRunAverageValues(Environment const& env, std::function const& stateValuesGetter, std::function const& actionValuesGetter); /*! * Sets whether an optimal scheduler shall be constructed during the computation @@ -81,21 +81,27 @@ namespace storm { storm::storage::Scheduler extractScheduler() const; protected: + + /*! + * @return true iff this is a computation on a continuous time model (i.e. MA) + */ + bool isContinuousTime() const; + /*! * @pre if scheduler production is enabled, the _producedOptimalChoices vector should be initialized and sufficiently large * @return the (unique) optimal LRA value for the given mec. * @post _producedOptimalChoices contains choices for the states of the given MEC which yield the returned LRA value. */ - ValueType computeLraForMec(Environment const& env, std::function const& combinedStateActionRewardsGetter, storm::storage::MaximalEndComponent const& mec); + ValueType computeLraForMec(Environment const& env, std::function const& stateValuesGetter, std::function const& actionValuesGetter, storm::storage::MaximalEndComponent const& mec); /*! * As computeLraForMec but uses value iteration as a solution method (independent of what is set in env) */ - ValueType computeLraForMecVi(Environment const& env, std::function const& combinedStateActionRewardsGetter, storm::storage::MaximalEndComponent const& mec); + ValueType computeLraForMecVi(Environment const& env, std::function const& stateValuesGetter, std::function const& actionValuesGetter, storm::storage::MaximalEndComponent const& mec); /*! * As computeLraForMec but uses linear programming as a solution method (independent of what is set in env) */ - ValueType computeLraForMecLp(Environment const& env, std::function const& combinedStateActionRewardsGetter, storm::storage::MaximalEndComponent const& mec); + ValueType computeLraForMecLp(Environment const& env, std::function const& stateValuesGetter, std::function const& actionValuesGetter, storm::storage::MaximalEndComponent const& mec); /*! * @return Lra values for each state