diff --git a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp index 92df53597..307f4d889 100644 --- a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp +++ b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.cpp @@ -29,9 +29,9 @@ namespace storm { template std::vector SparseSmgLraHelper::computeLongRunAverageRewardsSound(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel) { - STORM_LOG_DEBUG("Transition Matrix:\n" << _transitionMatrix); + // STORM_LOG_DEBUG("Transition Matrix:\n" << _transitionMatrix); std::vector result; - std::vector stateRewardsGetter; + std::vector stateRewardsGetter = std::vector(_transitionMatrix.getRowGroupCount(), storm::utility::zero()); if (rewardModel.hasStateRewards()) { stateRewardsGetter = rewardModel.getStateRewardVector(); } @@ -45,60 +45,104 @@ namespace storm { } else { actionRewardsGetter = [] (uint64_t) { return storm::utility::zero(); }; } - STORM_LOG_DEBUG("rewards: " << rewardModel.getStateRewardVector()); + std::vector b = getBVector(stateRewardsGetter, actionRewardsGetter); + + // If requested, allocate memory for the choices made + if (this->_produceScheduler) { + if (!this->_producedOptimalChoices.is_initialized()) { + _producedOptimalChoices.emplace(); + } + _producedOptimalChoices->resize(_transitionMatrix.getRowGroupCount()); + } + prepareMultiplier(env, rewardModel); - performValueIteration(env, rewardModel, stateRewardsGetter, actionRewardsGetter, result); + performValueIteration(env, rewardModel, b, actionRewardsGetter, result); return result; } template - void SparseSmgLraHelper::performValueIteration(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel, std::vector const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector& result, std::vector* choices, std::vector* choiceValues) + std::vector SparseSmgLraHelper::getBVector(std::vector const& stateRewardsGetter, ValueGetter const& actionRewardsGetter) { + std::vector b = std::vector(_transitionMatrix.getRowCount()); + size_t globalChoiceCount = 0; + auto rowGroupIndices = _transitionMatrix.getRowGroupIndices(); + for (size_t state = 0; state < _transitionMatrix.getRowGroupCount(); state++) { + size_t rowGroupSize = rowGroupIndices[state + 1] - rowGroupIndices[state]; + for (size_t choice = 0; choice < rowGroupSize; choice++, globalChoiceCount++) + { + b[globalChoiceCount] = stateRewardsGetter[state] + actionRewardsGetter(globalChoiceCount); + } + } + return b; + } + + template + void SparseSmgLraHelper::performValueIteration(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel, std::vector const& b, ValueGetter const& actionValueGetter, std::vector& result, std::vector* choices, std::vector* choiceValues) { std::vector choicesForStrategies = std::vector(_transitionMatrix.getRowGroupCount(), 0); - ValueType precision = storm::utility::convertNumber(env.solver().game().getPrecision()); + auto precision = storm::utility::convertNumber(env.solver().lra().getPrecision()); + Environment envMinMax = env; + envMinMax.solver().lra().setPrecision(precision / 2.0); + STORM_LOG_DEBUG(envMinMax.solver().lra().getPrecision()); do { - _x1IsCurrent = !_x1IsCurrent; + size_t iteration_count = 0; // Convergent recommender procedure - _multiplier->multiplyAndReduce(env, _optimizationDirection, xOld(), nullptr, xNew(), &choicesForStrategies, &_statesOfCoalition); - for (size_t i = 0; i < xNew().size(); i++) - { - xNew()[i] = xNew()[i] + stateValueGetter[i]; - } + _multiplier->multiplyAndReduce(env, _optimizationDirection, xNew(), &b, xNew(), &choicesForStrategies, &_statesOfCoalition); + if (iteration_count % 5 == 0) { // only every 5th iteration storm::storage::BitVector fixedMaxStrat = getStrategyFixedBitVec(choicesForStrategies, MinMaxStrategy::MaxStrategy); storm::storage::BitVector fixedMinStrat = getStrategyFixedBitVec(choicesForStrategies, MinMaxStrategy::MinStrategy); - storm::storage::SparseMatrix restrictedMaxMatrix = _transitionMatrix.restrictRows(fixedMaxStrat); - - storm::storage::SparseMatrix restrictedMinMatrix = _transitionMatrix.restrictRows(fixedMinStrat); // compute bounds - storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper MaxSolver(restrictedMaxMatrix); - MaxSolver.setOptimizationDirection(OptimizationDirection::Minimize); - std::vector resultForMax = MaxSolver.computeLongRunAverageRewards(env, rewardModel); + if (fixedMaxStrat != _fixedMaxStrat) { + storm::storage::SparseMatrix restrictedMaxMatrix = _transitionMatrix.restrictRows(fixedMaxStrat); + + storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper MaxSolver(restrictedMaxMatrix); + MaxSolver.setOptimizationDirection(OptimizationDirection::Minimize); + _resultForMax = MaxSolver.computeLongRunAverageRewards(envMinMax, rewardModel); + STORM_LOG_DEBUG("resultMax: " << _resultForMax); + _fixedMaxStrat = fixedMaxStrat; + + for (size_t i = 0; i < xNewL().size(); i++) { + xNewL()[i] = std::max(xNewL()[i], _resultForMax[i]); + } + } - for (size_t i = 0; i < xNewL().size(); i++) - { - xNewL()[i] = std::max(xOldL()[i], resultForMax[i]); - } + if (fixedMinStrat != _fixedMinStrat) { + storm::storage::SparseMatrix restrictedMinMatrix = _transitionMatrix.restrictRows(fixedMinStrat); - storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper MinSolver(restrictedMinMatrix); - MinSolver.setOptimizationDirection(OptimizationDirection::Maximize); - std::vector resultForMin = MinSolver.computeLongRunAverageRewards(env, rewardModel); + storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper MinSolver(restrictedMinMatrix); + MinSolver.setOptimizationDirection(OptimizationDirection::Maximize); + _resultForMin = MinSolver.computeLongRunAverageRewards(envMinMax, rewardModel); + STORM_LOG_DEBUG("resultMin: " << _resultForMin); + _fixedMinStrat = fixedMinStrat; - for (size_t i = 0; i < xNewU().size(); i++) - { - xNewU()[i] = std::min(xOldU()[i], resultForMin[i]); + for (size_t i = 0; i < xNewU().size(); i++) { + xNewU()[i] = std::min(xNewU()[i], _resultForMin[i]); + } + } } STORM_LOG_DEBUG("xL " << xNewL()); STORM_LOG_DEBUG("xU " << xNewU()); } while (!checkConvergence(precision)); - result = xNewU(); + + if (_produceScheduler) { + _multiplier->multiplyAndReduce(env, _optimizationDirection, xNew(), &b, xNew(), &_producedOptimalChoices.get(), &_statesOfCoalition); + } + + if (_produceChoiceValues) { + if (!this->_choiceValues.is_initialized()) { + this->_choiceValues.emplace(); + } + this->_choiceValues->resize(this->_transitionMatrix.getRowCount()); + _choiceValues = calcChoiceValues(envMinMax, rewardModel); + } + result = xNewL(); } @@ -108,7 +152,7 @@ namespace storm { auto rowGroupIndices = this->_transitionMatrix.getRowGroupIndices(); STORM_LOG_DEBUG("choices " << choices); - for(uint state = 0; state < rowGroupIndices.size() - 1; state++) { + for(uint state = 0; state < _transitionMatrix.getRowGroupCount(); state++) { if ((_minimizerStates[state] && strategy == MinMaxStrategy::MaxStrategy) || (!_minimizerStates[state] && strategy == MinMaxStrategy::MinStrategy)) continue; @@ -122,6 +166,75 @@ namespace storm { return restrictBy; } + template + std::vector SparseSmgLraHelper::calcChoiceValues(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel) { + std::vector choiceValues(_transitionMatrix.getRowCount()); + + storm::storage::SparseMatrix restrictedMaxMatrix = _transitionMatrix.restrictRows(_fixedMaxStrat); + storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper MaxSolver(restrictedMaxMatrix); + MaxSolver.setOptimizationDirection(OptimizationDirection::Minimize); + MaxSolver.setProduceChoiceValues(true); + MaxSolver.computeLongRunAverageRewards(env, rewardModel); + std::vector minimizerChoices = MaxSolver.getChoiceValues(); + + storm::storage::SparseMatrix restrictedMinMatrix = _transitionMatrix.restrictRows(_fixedMinStrat); + storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper MinSolver(restrictedMinMatrix); + MinSolver.setOptimizationDirection(OptimizationDirection::Maximize); + MinSolver.setProduceChoiceValues(true); + MinSolver.computeLongRunAverageRewards(env, rewardModel); + std::vector maximizerChoices = MinSolver.getChoiceValues(); + + auto rowGroupIndices = this->_transitionMatrix.getRowGroupIndices(); + + auto minIt = minimizerChoices.begin(); + auto maxIt = maximizerChoices.begin(); + size_t globalCounter = 0; + for(uint state = 0; state < _transitionMatrix.getRowGroupCount(); state++) { + uint rowGroupSize = rowGroupIndices[state + 1] - rowGroupIndices[state]; + for(uint rowGroupIndex = 0; rowGroupIndex < rowGroupSize; rowGroupIndex++) { + if (_minimizerStates[state]) { + choiceValues[globalCounter] = *minIt; + minIt++; + } + else { + choiceValues[globalCounter] = *maxIt; + maxIt++; + } + globalCounter++; + } + if (_minimizerStates[state]) { + maxIt++; + } + else { + minIt++; + } + } + return choiceValues; + } + + template + std::vector SparseSmgLraHelper::getChoiceValues() const { + STORM_LOG_ASSERT(_produceChoiceValues, "Trying to get the computed choice values although this was not requested."); + STORM_LOG_ASSERT(this->_choiceValues.is_initialized(), "Trying to get the computed choice values but none were available. Was there a computation call before?"); + return this->_choiceValues.get(); + } + + template + storm::storage::Scheduler SparseSmgLraHelper::extractScheduler() const{ + auto const& optimalChoices = getProducedOptimalChoices(); + storm::storage::Scheduler scheduler(optimalChoices.size()); + for (uint64_t state = 0; state < optimalChoices.size(); ++state) { + scheduler.setChoice(optimalChoices[state], state); + } + return scheduler; + } + + template + std::vector const& SparseSmgLraHelper::getProducedOptimalChoices() const { + STORM_LOG_ASSERT(_produceScheduler, "Trying to get the produced optimal choices although no scheduler was requested."); + STORM_LOG_ASSERT(this->_producedOptimalChoices.is_initialized(), "Trying to get the produced optimal choices but none were available. Was there a computation call before?"); + return this->_producedOptimalChoices.get(); + } template void SparseSmgLraHelper::prepareMultiplier(const Environment& env, storm::models::sparse::StandardRewardModel const& rewardModel) @@ -134,6 +247,12 @@ namespace storm { _x1 = _x1L; _x2 = _x1; + _fixedMaxStrat = storm::storage::BitVector(_transitionMatrix.getRowCount(), false); + _fixedMinStrat = storm::storage::BitVector(_transitionMatrix.getRowCount(), false); + + _resultForMin = std::vector(_transitionMatrix.getRowGroupCount()); + _resultForMax = std::vector(_transitionMatrix.getRowGroupCount()); + _x1U = std::vector(_transitionMatrix.getRowGroupCount(), std::numeric_limits::infinity()); _x2U = _x1U; } @@ -146,22 +265,9 @@ namespace storm { auto x1It = xNewL().begin(); auto x1Ite = xNewL().end(); auto x2It = xNewU().begin(); - ValueType maxDiff = ((*x2It) - (*x1It)); - ValueType minDiff = maxDiff; - // The difference between maxDiff and minDiff is zero at this point. Thus, it doesn't make sense to check the threshold now. - for (++x1It, ++x2It; x1It != x1Ite; ++x1It, ++x2It) { + for (; x1It != x1Ite; x1It++, x2It++) { ValueType diff = (*x2It - *x1It); - // Potentially update maxDiff or minDiff - bool skipCheck = false; - if (maxDiff < diff) { - maxDiff = diff; - } else if (minDiff > diff) { - minDiff = diff; - } else { - skipCheck = true; - } - // Check convergence - if (!skipCheck && (maxDiff - minDiff) > threshold) { + if (diff > threshold) { return false; } } diff --git a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h index f4bb609e0..d9be65770 100644 --- a/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h +++ b/src/storm/modelchecker/helper/infinitehorizon/internal/SparseSmgLraHelper.h @@ -24,7 +24,7 @@ namespace storm { template class SparseSmgLraHelper { public: - /// Function mapping from indices to values + // Function mapping from indices to values typedef std::function ValueGetter; SparseSmgLraHelper(storm::storage::SparseMatrix const& transitionMatrix, storm::storage::BitVector const statesOfCoalition); @@ -42,7 +42,11 @@ namespace storm { */ void performValueIteration(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel, std::vector const& stateValueGetter, ValueGetter const& actionValueGetter, std::vector& result, std::vector* choices = nullptr, std::vector* choiceValues = nullptr); + std::vector getChoiceValues() const; + storm::storage::Scheduler extractScheduler() const; + + std::vector const& getProducedOptimalChoices() const; void prepareMultiplier(const Environment& env, storm::models::sparse::StandardRewardModel const& rewardModel); @@ -91,6 +95,10 @@ namespace storm { storm::storage::BitVector getStrategyFixedBitVec(std::vector const& choices, MinMaxStrategy strategy); + std::vector getBVector(std::vector const& stateRewardsGetter, ValueGetter const& actionRewardsGetter); + + std::vector calcChoiceValues(Environment const& env, storm::models::sparse::StandardRewardModel const& rewardModel); + /*! * Must be called between two calls of performIterationStep. */ @@ -128,9 +136,14 @@ namespace storm { storm::storage::BitVector const _statesOfCoalition; ValueType _strategyVIPrecision; - storm::storage::BitVector _relevantStates; storm::storage::BitVector _minimizerStates; + + storm::storage::BitVector _fixedMinStrat; + storm::storage::BitVector _fixedMaxStrat; + std::vector _resultForMax; + std::vector _resultForMin; + boost::optional> _valueThreshold; storm::solver::OptimizationDirection _optimizationDirection; bool _produceScheduler; @@ -147,6 +160,9 @@ namespace storm { std::unique_ptr> _Solver; std::unique_ptr> _DetIsSolver; std::unique_ptr _IsSolverEnv; + + boost::optional> _producedOptimalChoices; + boost::optional> _choiceValues; }; } }