Browse Source

implemented VI based Long-run-average method for MDPs

tempestpy_adaptions
TimQu 7 years ago
parent
commit
5b10b027fc
  1. 146
      src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp
  2. 6
      src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h

146
src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp

@ -1,4 +1,7 @@
#include "storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h"
#include "storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h"
#include <boost/container/flat_map.hpp>
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "storm/modelchecker/hints/ExplicitModelCheckerHint.h"
@ -18,6 +21,9 @@
#include "storm/solver/MinMaxLinearEquationSolver.h"
#include "storm/solver/LpSolver.h"
#include "storm/settings/SettingsManager.h"
#include "storm/settings/modules/MinMaxEquationSolverSettings.h"
#include "storm/exceptions/InvalidStateException.h"
#include "storm/exceptions/InvalidPropertyException.h"
#include "storm/exceptions/InvalidSettingsException.h"
@ -543,7 +549,7 @@ namespace storm {
for (uint_fast64_t currentMecIndex = 0; currentMecIndex < mecDecomposition.size(); ++currentMecIndex) {
storm::storage::MaximalEndComponent const& mec = mecDecomposition[currentMecIndex];
lraValuesForEndComponents[currentMecIndex] = computeLraForMaximalEndComponent(dir, transitionMatrix, rewardModel, mec);
lraValuesForEndComponents[currentMecIndex] = computeLraForMaximalEndComponent(dir, transitionMatrix, rewardModel, mec, minMaxLinearEquationSolverFactory);
// Gather information for later use.
for (auto const& stateChoicesPair : mec) {
@ -668,7 +674,133 @@ namespace storm {
template<typename ValueType>
template<typename RewardModelType>
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec) {
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory) {
// If the mec only consists of a single state, we compute the LRA value directly
if (++mec.begin() == mec.end()) {
uint64_t state = mec.begin()->first;
auto choiceIt = mec.begin()->second.begin();
ValueType result = rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix);
for (++choiceIt; choiceIt != mec.begin()->second.end(); ++choiceIt) {
if (storm::solver::minimize(dir)) {
result = std::min(result, rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix));
} else {
result = std::max(result, rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix));
}
}
return result;
}
// Solve MEC with the method specified in the settings
storm::solver::MinMaxMethod method = storm::settings::getModule<storm::settings::modules::MinMaxEquationSolverSettings>().getMinMaxEquationSolvingMethod();
if (method == storm::solver::MinMaxMethod::LinearProgramming) {
return computeLraForMaximalEndComponentLP(dir, transitionMatrix, rewardModel, mec);
} else if (method == storm::solver::MinMaxMethod::ValueIteration) {
return computeLraForMaximalEndComponentVI(dir, transitionMatrix, rewardModel, mec, minMaxLinearEquationSolverFactory);
} else {
STORM_LOG_THROW(false, storm::exceptions::InvalidSettingsException, "Unsupported technique.");
}
}
template<typename ValueType>
template<typename RewardModelType>
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory) {
// Initialize data about the mec
storm::storage::BitVector mecStates(transitionMatrix.getRowGroupCount(), false);
storm::storage::BitVector mecChoices(transitionMatrix.getRowCount(), false);
for (auto const& stateChoicesPair : mec) {
mecStates.set(stateChoicesPair.first);
for (auto const& choice : stateChoicesPair.second) {
mecChoices.set(choice);
}
}
boost::container::flat_map<uint64_t, uint64_t> toSubModelStateMapping;
uint64_t currState = 0;
toSubModelStateMapping.reserve(mecStates.getNumberOfSetBits());
for (auto const& mecState : mecStates) {
toSubModelStateMapping.insert(std::pair<uint64_t, uint64_t>(mecState, currState));
++currState;
}
// Get a transition matrix that only considers the states and choices within the MEC
storm::storage::SparseMatrixBuilder<ValueType> mecTransitionBuilder(mecChoices.getNumberOfSetBits(), mecStates.getNumberOfSetBits(), 0, true, true, mecStates.getNumberOfSetBits());
std::vector<ValueType> choiceRewards;
choiceRewards.reserve(mecChoices.getNumberOfSetBits());
uint64_t currRow = 0;
ValueType selfLoopProb = storm::utility::convertNumber<ValueType>(0.1); // todo try other values
ValueType scalingFactor = storm::utility::one<ValueType>() - selfLoopProb;
for (auto const& mecState : mecStates) {
mecTransitionBuilder.newRowGroup(currRow);
uint64_t groupStart = transitionMatrix.getRowGroupIndices()[mecState];
uint64_t groupEnd = transitionMatrix.getRowGroupIndices()[mecState + 1];
for (uint64_t choice = mecChoices.getNextSetIndex(groupStart); choice < groupEnd; choice = mecChoices.getNextSetIndex(choice + 1)) {
bool insertedDiagElement = false;
for (auto const& entry : transitionMatrix.getRow(choice)) {
uint64_t column = toSubModelStateMapping[entry.getColumn()];
if (!insertedDiagElement && entry.getColumn() > mecState) {
mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb);
insertedDiagElement = true;
}
if (!insertedDiagElement && entry.getColumn() == mecState) {
mecTransitionBuilder.addNextValue(currRow, column, selfLoopProb + scalingFactor * entry.getValue());
insertedDiagElement = true;
} else {
mecTransitionBuilder.addNextValue(currRow, column, scalingFactor * entry.getValue());
}
}
if (!insertedDiagElement) {
mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb);
}
// Compute the rewards obtained for this choice
choiceRewards.push_back(scalingFactor * rewardModel.getTotalStateActionReward(mecState, choice, transitionMatrix));
++currRow;
}
}
auto mecTransitions = mecTransitionBuilder.build();
STORM_LOG_ASSERT(mecTransitions.isProbabilistic(), "The MEC-Matrix is not probabilistic.");
// start the iterations
ValueType precision = storm::utility::convertNumber<ValueType>(storm::settings::getModule<storm::settings::modules::MinMaxEquationSolverSettings>().getPrecision());
std::vector<ValueType> x(mecTransitions.getRowGroupCount(), storm::utility::zero<ValueType>());
std::vector<ValueType> xPrime = x;
auto solver = minMaxLinearEquationSolverFactory.create(std::move(mecTransitions));
solver->setCachingEnabled(true);
ValueType maxDiff, minDiff;
while (true) {
// Compute the obtained rewards for the next step
solver->repeatedMultiply(dir, x, &choiceRewards, 1);
// update xPrime and check for convergence
// to avoid large (and numerically unstable) x-values, we substract a reference value.
auto xIt = x.begin();
auto xPrimeIt = xPrime.begin();
ValueType refVal = *xIt;
maxDiff = *xIt - *xPrimeIt;
minDiff = maxDiff;
*xIt -= refVal;
*xPrimeIt = *xIt;
for (++xIt, ++xPrimeIt; xIt != x.end(); ++xIt, ++xPrimeIt) {
ValueType diff = *xIt - *xPrimeIt;
maxDiff = std::max(maxDiff, diff);
minDiff = std::min(minDiff, diff);
*xIt -= refVal;
*xPrimeIt = *xIt;
}
if ((maxDiff - minDiff) < precision) {
break;
}
}
return (maxDiff + minDiff) / (storm::utility::convertNumber<ValueType>(2.0) * scalingFactor);
}
template<typename ValueType>
template<typename RewardModelType>
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec) {
std::shared_ptr<storm::solver::LpSolver> solver = storm::utility::solver::getLpSolver("LRA for MEC");
solver->setOptimizationDirection(invert(dir));
@ -817,7 +949,9 @@ namespace storm {
template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeReachabilityRewards(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeReachabilityRewards(storm::solver::SolveGoal const& goal, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
template std::vector<double> SparseMdpPrctlHelper<double>::computeLongRunAverageRewards(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<double> const& minMaxLinearEquationSolverFactory);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
#ifdef STORM_HAVE_CARL
template class SparseMdpPrctlHelper<storm::RationalNumber>;
@ -826,7 +960,9 @@ namespace storm {
template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeReachabilityRewards(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeReachabilityRewards(storm::solver::SolveGoal const& goal, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint);
template std::vector<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeLongRunAverageRewards(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<storm::RationalNumber> const& minMaxLinearEquationSolverFactory);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
#endif
}

6
src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h

@ -75,7 +75,11 @@ namespace storm {
static MDPSparseModelCheckingHelperReturnType<ValueType> computeReachabilityRewardsHelper(storm::solver::SolveGoal const& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, std::function<std::vector<ValueType>(uint_fast64_t, storm::storage::SparseMatrix<ValueType> const&, storm::storage::BitVector const&)> const& totalStateRewardVectorGetter, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory, ModelCheckerHint const& hint = ModelCheckerHint());
template<typename RewardModelType>
static ValueType computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec);
static ValueType computeLraForMaximalEndComponent(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory);
template<typename RewardModelType>
static ValueType computeLraForMaximalEndComponentVI(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, storm::solver::MinMaxLinearEquationSolverFactory<ValueType> const& minMaxLinearEquationSolverFactory);
template<typename RewardModelType>
static ValueType computeLraForMaximalEndComponentLP(OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec);
};

Loading…
Cancel
Save