Browse Source

Dropping old MDP LRA code.

tempestpy_adaptions
Tim Quatmann 4 years ago
parent
commit
d06a39eb79
  1. 467
      src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp
  2. 13
      src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h

467
src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp

@ -39,7 +39,6 @@
#include "storm/transformer/EndComponentEliminator.h"
#include "storm/environment/solver/MinMaxSolverEnvironment.h"
#include "storm/environment/solver/LongRunAverageSolverEnvironment.h"
#include "storm/exceptions/InvalidStateException.h"
#include "storm/exceptions/InvalidPropertyException.h"
@ -1208,464 +1207,6 @@ namespace storm {
return MDPSparseModelCheckingHelperReturnType<ValueType>(std::move(result), std::move(scheduler));
}
template<typename ValueType>
MDPSparseModelCheckingHelperReturnType<ValueType> SparseMdpPrctlHelper<ValueType>::computeLongRunAverageProbabilities(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, storm::storage::BitVector const& psiStates, bool produceScheduler) {
// If there are no goal states, we avoid the computation and directly return zero.
if (psiStates.empty()) {
return std::vector<ValueType>(transitionMatrix.getRowGroupCount(), storm::utility::zero<ValueType>());
}
// Likewise, if all bits are set, we can avoid the computation and set.
if (psiStates.full()) {
return std::vector<ValueType>(transitionMatrix.getRowGroupCount(), storm::utility::one<ValueType>());
}
// Reduce long run average probabilities to long run average rewards by
// building a reward model assigning one reward to every psi state
std::vector<ValueType> stateRewards(psiStates.size(), storm::utility::zero<ValueType>());
storm::utility::vector::setVectorValues(stateRewards, psiStates, storm::utility::one<ValueType>());
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(std::move(stateRewards));
return computeLongRunAverageRewards(env, std::move(goal), transitionMatrix, backwardTransitions, rewardModel, produceScheduler);
}
template<typename ValueType>
template<typename RewardModelType>
MDPSparseModelCheckingHelperReturnType<ValueType> SparseMdpPrctlHelper<ValueType>::computeLongRunAverageRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, RewardModelType const& rewardModel, bool produceScheduler) {
uint64_t numberOfStates = transitionMatrix.getRowGroupCount();
std::unique_ptr<storm::storage::Scheduler<ValueType>> scheduler;
if (produceScheduler) {
scheduler = std::make_unique<storm::storage::Scheduler<ValueType>>(numberOfStates);
}
// Start by decomposing the MDP into its MECs.
storm::storage::MaximalEndComponentDecomposition<ValueType> mecDecomposition(transitionMatrix, backwardTransitions);
// Get some data members for convenience.
std::vector<uint_fast64_t> const& nondeterministicChoiceIndices = transitionMatrix.getRowGroupIndices();
ValueType zero = storm::utility::zero<ValueType>();
//first calculate LRA for the Maximal End Components.
storm::storage::BitVector statesInMecs(numberOfStates);
std::vector<uint_fast64_t> stateToMecIndexMap(transitionMatrix.getColumnCount());
std::vector<ValueType> lraValuesForEndComponents(mecDecomposition.size(), zero);
auto underlyingSolverEnvironment = env;
if (env.solver().isForceSoundness()) {
// For sound computations, the error in the MECS plus the error in the remaining system should be less than the user defined precsion.
underlyingSolverEnvironment.solver().lra().setPrecision(env.solver().lra().getPrecision() / storm::utility::convertNumber<storm::RationalNumber>(2));
underlyingSolverEnvironment.solver().minMax().setPrecision(env.solver().lra().getPrecision() / storm::utility::convertNumber<storm::RationalNumber>(2));
underlyingSolverEnvironment.solver().minMax().setRelativeTerminationCriterion(env.solver().lra().getRelativeTerminationCriterion());
}
for (uint_fast64_t currentMecIndex = 0; currentMecIndex < mecDecomposition.size(); ++currentMecIndex) {
storm::storage::MaximalEndComponent const& mec = mecDecomposition[currentMecIndex];
lraValuesForEndComponents[currentMecIndex] = computeLraForMaximalEndComponent(underlyingSolverEnvironment, goal.direction(), transitionMatrix, rewardModel, mec, scheduler);
// Gather information for later use.
for (auto const& stateChoicesPair : mec) {
statesInMecs.set(stateChoicesPair.first);
stateToMecIndexMap[stateChoicesPair.first] = currentMecIndex;
}
}
// For fast transition rewriting, we build some auxiliary data structures.
storm::storage::BitVector statesNotContainedInAnyMec = ~statesInMecs;
uint_fast64_t firstAuxiliaryStateIndex = statesNotContainedInAnyMec.getNumberOfSetBits();
uint_fast64_t lastStateNotInMecs = 0;
uint_fast64_t numberOfStatesNotInMecs = 0;
std::vector<uint_fast64_t> statesNotInMecsBeforeIndex;
statesNotInMecsBeforeIndex.reserve(numberOfStates);
for (auto state : statesNotContainedInAnyMec) {
while (lastStateNotInMecs <= state) {
statesNotInMecsBeforeIndex.push_back(numberOfStatesNotInMecs);
++lastStateNotInMecs;
}
++numberOfStatesNotInMecs;
}
// Finally, we are ready to create the SSP matrix and right-hand side of the SSP.
std::vector<ValueType> b;
uint64_t numberOfSspStates = numberOfStatesNotInMecs + mecDecomposition.size();
typename storm::storage::SparseMatrixBuilder<ValueType> sspMatrixBuilder(0, numberOfSspStates, 0, false, true, numberOfSspStates);
// If the source state is not contained in any MEC, we copy its choices (and perform the necessary modifications).
uint_fast64_t currentChoice = 0;
for (auto state : statesNotContainedInAnyMec) {
sspMatrixBuilder.newRowGroup(currentChoice);
for (uint_fast64_t choice = nondeterministicChoiceIndices[state]; choice < nondeterministicChoiceIndices[state + 1]; ++choice, ++currentChoice) {
std::vector<ValueType> auxiliaryStateToProbabilityMap(mecDecomposition.size());
b.push_back(storm::utility::zero<ValueType>());
for (auto element : transitionMatrix.getRow(choice)) {
if (statesNotContainedInAnyMec.get(element.getColumn())) {
// If the target state is not contained in an MEC, we can copy over the entry.
sspMatrixBuilder.addNextValue(currentChoice, statesNotInMecsBeforeIndex[element.getColumn()], element.getValue());
} else {
// If the target state is contained in MEC i, we need to add the probability to the corresponding field in the vector
// so that we are able to write the cumulative probability to the MEC into the matrix.
auxiliaryStateToProbabilityMap[stateToMecIndexMap[element.getColumn()]] += element.getValue();
}
}
// Now insert all (cumulative) probability values that target an MEC.
for (uint_fast64_t mecIndex = 0; mecIndex < auxiliaryStateToProbabilityMap.size(); ++mecIndex) {
if (!storm::utility::isZero(auxiliaryStateToProbabilityMap[mecIndex])) {
sspMatrixBuilder.addNextValue(currentChoice, firstAuxiliaryStateIndex + mecIndex, auxiliaryStateToProbabilityMap[mecIndex]);
}
}
}
}
std::vector<std::pair<uint_fast64_t, uint_fast64_t>> sspMecChoicesToOriginalMap; // for scheduler extraction
// Now we are ready to construct the choices for the auxiliary states.
for (uint_fast64_t mecIndex = 0; mecIndex < mecDecomposition.size(); ++mecIndex) {
storm::storage::MaximalEndComponent const& mec = mecDecomposition[mecIndex];
sspMatrixBuilder.newRowGroup(currentChoice);
for (auto const& stateChoicesPair : mec) {
uint_fast64_t state = stateChoicesPair.first;
storm::storage::FlatSet<uint_fast64_t> const& choicesInMec = stateChoicesPair.second;
for (uint_fast64_t choice = nondeterministicChoiceIndices[state]; choice < nondeterministicChoiceIndices[state + 1]; ++choice) {
// If the choice is not contained in the MEC itself, we have to add a similar distribution to the auxiliary state.
if (choicesInMec.find(choice) == choicesInMec.end()) {
std::vector<ValueType> auxiliaryStateToProbabilityMap(mecDecomposition.size());
b.push_back(storm::utility::zero<ValueType>());
for (auto element : transitionMatrix.getRow(choice)) {
if (statesNotContainedInAnyMec.get(element.getColumn())) {
// If the target state is not contained in an MEC, we can copy over the entry.
sspMatrixBuilder.addNextValue(currentChoice, statesNotInMecsBeforeIndex[element.getColumn()], element.getValue());
} else {
// If the target state is contained in MEC i, we need to add the probability to the corresponding field in the vector
// so that we are able to write the cumulative probability to the MEC into the matrix.
auxiliaryStateToProbabilityMap[stateToMecIndexMap[element.getColumn()]] += element.getValue();
}
}
// Now insert all (cumulative) probability values that target an MEC.
for (uint_fast64_t targetMecIndex = 0; targetMecIndex < auxiliaryStateToProbabilityMap.size(); ++targetMecIndex) {
if (!storm::utility::isZero(auxiliaryStateToProbabilityMap[targetMecIndex])) {
sspMatrixBuilder.addNextValue(currentChoice, firstAuxiliaryStateIndex + targetMecIndex, auxiliaryStateToProbabilityMap[targetMecIndex]);
}
}
if (produceScheduler) {
sspMecChoicesToOriginalMap.emplace_back(state, choice - nondeterministicChoiceIndices[state]);
}
++currentChoice;
}
}
}
// For each auxiliary state, there is the option to achieve the reward value of the LRA associated with the MEC.
++currentChoice;
b.push_back(lraValuesForEndComponents[mecIndex]);
if (produceScheduler) {
// Insert some invalid values
sspMecChoicesToOriginalMap.emplace_back(std::numeric_limits<uint_fast64_t>::max(), std::numeric_limits<uint_fast64_t>::max());
}
}
// Finalize the matrix and solve the corresponding system of equations.
storm::storage::SparseMatrix<ValueType> sspMatrix = sspMatrixBuilder.build(currentChoice, numberOfSspStates, numberOfSspStates);
// Check for requirements of the solver.
storm::solver::GeneralMinMaxLinearEquationSolverFactory<ValueType> minMaxLinearEquationSolverFactory;
storm::solver::MinMaxLinearEquationSolverRequirements requirements = minMaxLinearEquationSolverFactory.getRequirements(underlyingSolverEnvironment, true, true, goal.direction(), false, produceScheduler);
requirements.clearBounds();
STORM_LOG_THROW(!requirements.hasEnabledCriticalRequirement(), storm::exceptions::UncheckedRequirementException, "Solver requirements " + requirements.getEnabledRequirementsAsString() + " not checked.");
std::vector<ValueType> sspResult(numberOfSspStates);
goal.restrictRelevantValues(statesNotContainedInAnyMec);
std::unique_ptr<storm::solver::MinMaxLinearEquationSolver<ValueType>> solver = storm::solver::configureMinMaxLinearEquationSolver(underlyingSolverEnvironment, std::move(goal), minMaxLinearEquationSolverFactory, sspMatrix);
solver->setLowerBound(storm::utility::zero<ValueType>());
solver->setUpperBound(*std::max_element(lraValuesForEndComponents.begin(), lraValuesForEndComponents.end()));
solver->setHasUniqueSolution();
solver->setHasNoEndComponents();
solver->setTrackScheduler(produceScheduler);
solver->setRequirementsChecked();
solver->solveEquations(underlyingSolverEnvironment, sspResult, b);
// Prepare result vector.
std::vector<ValueType> result(numberOfStates, zero);
// Set the values for states not contained in MECs.
storm::utility::vector::setVectorValues(result, statesNotContainedInAnyMec, sspResult);
// Set the values for all states in MECs.
for (auto state : statesInMecs) {
result[state] = sspResult[firstAuxiliaryStateIndex + stateToMecIndexMap[state]];
}
if (produceScheduler && solver->hasScheduler()) {
// Translate result for ssp matrix to original model
auto const& sspChoices = solver->getSchedulerChoices();
uint64_t sspState = 0;
for (auto state : statesNotContainedInAnyMec) {
scheduler->setChoice(sspChoices[sspState], state);
++sspState;
}
// The other sspStates correspond to MECS in the original system.
uint_fast64_t rowOffset = sspMatrix.getRowGroupIndices()[sspState];
for (uint_fast64_t mecIndex = 0; mecIndex < mecDecomposition.size(); ++mecIndex) {
// Obtain the state and choice of the original model to which the selected choice corresponds.
auto const& originalStateChoice = sspMecChoicesToOriginalMap[sspMatrix.getRowGroupIndices()[sspState] + sspChoices[sspState] - rowOffset];
// Check if the best choice is to stay in this MEC
if (originalStateChoice.first == std::numeric_limits<uint_fast64_t>::max()) {
STORM_LOG_ASSERT(sspMatrix.getRow(sspState, sspChoices[sspState]).getNumberOfEntries() == 0, "Expected empty row at choice that stays in MEC.");
// In this case, no further operations are necessary. The scheduler has already been set to the optimal choices during the call of computeLraForMaximalEndComponent.
} else {
// The best choice is to leave this MEC via the selected state and choice.
scheduler->setChoice(originalStateChoice.second, originalStateChoice.first);
// The remaining states in this MEC need to reach this state with probability 1.
storm::storage::BitVector exitStateAsBitVector(transitionMatrix.getRowGroupCount(), false);
exitStateAsBitVector.set(originalStateChoice.first, true);
storm::storage::BitVector otherStatesAsBitVector(transitionMatrix.getRowGroupCount(), false);
for (auto const& stateChoices : mecDecomposition[mecIndex]) {
if (stateChoices.first != originalStateChoice.first) {
otherStatesAsBitVector.set(stateChoices.first, true);
}
}
storm::utility::graph::computeSchedulerProb1E(otherStatesAsBitVector, transitionMatrix, backwardTransitions, otherStatesAsBitVector, exitStateAsBitVector, *scheduler);
}
++sspState;
}
assert(sspState == sspMatrix.getRowGroupCount());
} else {
STORM_LOG_ERROR_COND(!produceScheduler, "Requested to produce a scheduler, but no scheduler was generated.");
}
return MDPSparseModelCheckingHelperReturnType<ValueType>(std::move(result), std::move(scheduler));
}
template<typename ValueType>
template<typename RewardModelType>
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponent(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<ValueType>>& scheduler) {
// If the mec only consists of a single state, we compute the LRA value directly
if (++mec.begin() == mec.end()) {
uint64_t state = mec.begin()->first;
auto choiceIt = mec.begin()->second.begin();
ValueType result = rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix);
uint_fast64_t bestChoice = *choiceIt;
for (++choiceIt; choiceIt != mec.begin()->second.end(); ++choiceIt) {
ValueType choiceValue = rewardModel.getTotalStateActionReward(state, *choiceIt, transitionMatrix);
if (storm::solver::minimize(dir)) {
if (result > choiceValue) {
result = std::move(choiceValue);
bestChoice = *choiceIt;
}
} else {
if (result < choiceValue) {
result = std::move(choiceValue);
bestChoice = *choiceIt;
}
}
}
if (scheduler) {
scheduler->setChoice(bestChoice - transitionMatrix.getRowGroupIndices()[state], state);
}
return result;
}
// Solve MEC with the method specified in the settings
storm::solver::LraMethod method = env.solver().lra().getNondetLraMethod();
if ((storm::NumberTraits<ValueType>::IsExact || env.solver().isForceExact()) && env.solver().lra().isNondetLraMethodSetFromDefault() && method != storm::solver::LraMethod::LinearProgramming) {
STORM_LOG_INFO("Selecting 'LP' as the solution technique for long-run properties to guarantee exact results. If you want to override this, please explicitly specify a different LRA method.");
method = storm::solver::LraMethod::LinearProgramming;
} else if (env.solver().isForceSoundness() && env.solver().lra().isNondetLraMethodSetFromDefault() && method != storm::solver::LraMethod::ValueIteration) {
STORM_LOG_INFO("Selecting 'VI' as the solution technique for long-run properties to guarantee sound results. If you want to override this, please explicitly specify a different LRA method.");
method = storm::solver::LraMethod::ValueIteration;
}
STORM_LOG_ERROR_COND(scheduler == nullptr || method == storm::solver::LraMethod::ValueIteration, "Scheduler generation not supported for the chosen LRA method. Try value-iteration.");
if (method == storm::solver::LraMethod::LinearProgramming) {
return computeLraForMaximalEndComponentLP(env, dir, transitionMatrix, rewardModel, mec);
} else if (method == storm::solver::LraMethod::ValueIteration) {
return computeLraForMaximalEndComponentVI(env, dir, transitionMatrix, rewardModel, mec, scheduler);
} else {
STORM_LOG_THROW(false, storm::exceptions::InvalidSettingsException, "Unsupported technique.");
}
}
template<typename ValueType>
template<typename RewardModelType>
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponentVI(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<ValueType>>& scheduler) {
// Initialize data about the mec
storm::storage::BitVector mecStates(transitionMatrix.getRowGroupCount(), false);
storm::storage::BitVector mecChoices(transitionMatrix.getRowCount(), false);
for (auto const& stateChoicesPair : mec) {
mecStates.set(stateChoicesPair.first);
for (auto const& choice : stateChoicesPair.second) {
mecChoices.set(choice);
}
}
boost::container::flat_map<uint64_t, uint64_t> toSubModelStateMapping;
uint64_t currState = 0;
toSubModelStateMapping.reserve(mecStates.getNumberOfSetBits());
for (auto const& mecState : mecStates) {
toSubModelStateMapping.insert(std::pair<uint64_t, uint64_t>(mecState, currState));
++currState;
}
// Get a transition matrix that only considers the states and choices within the MEC
storm::storage::SparseMatrixBuilder<ValueType> mecTransitionBuilder(mecChoices.getNumberOfSetBits(), mecStates.getNumberOfSetBits(), 0, true, true, mecStates.getNumberOfSetBits());
std::vector<ValueType> choiceRewards;
choiceRewards.reserve(mecChoices.getNumberOfSetBits());
uint64_t currRow = 0;
ValueType selfLoopProb = storm::utility::convertNumber<ValueType>(env.solver().lra().getAperiodicFactor());
ValueType scalingFactor = storm::utility::one<ValueType>() - selfLoopProb;
for (auto const& mecState : mecStates) {
mecTransitionBuilder.newRowGroup(currRow);
uint64_t groupStart = transitionMatrix.getRowGroupIndices()[mecState];
uint64_t groupEnd = transitionMatrix.getRowGroupIndices()[mecState + 1];
for (uint64_t choice = mecChoices.getNextSetIndex(groupStart); choice < groupEnd; choice = mecChoices.getNextSetIndex(choice + 1)) {
bool insertedDiagElement = false;
for (auto const& entry : transitionMatrix.getRow(choice)) {
uint64_t column = toSubModelStateMapping[entry.getColumn()];
if (!insertedDiagElement && entry.getColumn() > mecState) {
mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb);
insertedDiagElement = true;
}
if (!insertedDiagElement && entry.getColumn() == mecState) {
mecTransitionBuilder.addNextValue(currRow, column, selfLoopProb + scalingFactor * entry.getValue());
insertedDiagElement = true;
} else {
mecTransitionBuilder.addNextValue(currRow, column, scalingFactor * entry.getValue());
}
}
if (!insertedDiagElement) {
mecTransitionBuilder.addNextValue(currRow, toSubModelStateMapping[mecState], selfLoopProb);
}
// Compute the rewards obtained for this choice
choiceRewards.push_back(scalingFactor * rewardModel.getTotalStateActionReward(mecState, choice, transitionMatrix));
++currRow;
}
}
auto mecTransitions = mecTransitionBuilder.build();
STORM_LOG_ASSERT(mecTransitions.isProbabilistic(), "The MEC-Matrix is not probabilistic.");
// start the iterations
ValueType precision = storm::utility::convertNumber<ValueType>(env.solver().lra().getPrecision()) / scalingFactor;
bool relative = env.solver().lra().getRelativeTerminationCriterion();
std::vector<ValueType> x(mecTransitions.getRowGroupCount(), storm::utility::zero<ValueType>());
std::vector<ValueType> xPrime = x;
auto multiplier = storm::solver::MultiplierFactory<ValueType>().create(env, mecTransitions);
ValueType maxDiff, minDiff;
uint64_t iter = 0;
boost::optional<uint64_t> maxIter;
if (env.solver().lra().isMaximalIterationCountSet()) {
maxIter = env.solver().lra().getMaximalIterationCount();
}
while (!maxIter.is_initialized() || iter < maxIter.get()) {
++iter;
// Compute the obtained rewards for the next step
multiplier->multiplyAndReduce(env, dir, x, &choiceRewards, x);
// update xPrime and check for convergence
// to avoid large (and numerically unstable) x-values, we substract a reference value.
auto xIt = x.begin();
auto xPrimeIt = xPrime.begin();
ValueType refVal = *xIt;
maxDiff = *xIt - *xPrimeIt;
minDiff = maxDiff;
*xIt -= refVal;
*xPrimeIt = *xIt;
for (++xIt, ++xPrimeIt; xIt != x.end(); ++xIt, ++xPrimeIt) {
ValueType diff = *xIt - *xPrimeIt;
maxDiff = std::max(maxDiff, diff);
minDiff = std::min(minDiff, diff);
*xIt -= refVal;
*xPrimeIt = *xIt;
}
if ((maxDiff - minDiff) <= (relative ? (precision * minDiff) : precision)) {
break;
}
if (storm::utility::resources::isTerminate()) {
break;
}
}
if (maxIter.is_initialized() && iter == maxIter.get()) {
STORM_LOG_WARN("LRA computation did not converge within " << iter << " iterations.");
} else {
STORM_LOG_TRACE("LRA computation converged after " << iter << " iterations.");
}
if (scheduler) {
std::vector<uint_fast64_t> localMecChoices(mecTransitions.getRowGroupCount(), 0);
multiplier->multiplyAndReduce(env, dir, x, &choiceRewards, x, &localMecChoices);
auto localMecChoiceIt = localMecChoices.begin();
for (auto const& mecState : mecStates) {
// Get the choice index of the selected mec choice with respect to the global transition matrix.
uint_fast64_t globalChoice = mecChoices.getNextSetIndex(transitionMatrix.getRowGroupIndices()[mecState]);
for (uint_fast64_t i = 0; i < *localMecChoiceIt; ++i) {
globalChoice = mecChoices.getNextSetIndex(globalChoice + 1);
}
STORM_LOG_ASSERT(globalChoice < transitionMatrix.getRowGroupIndices()[mecState + 1], "Invalid global choice for mec state.");
scheduler->setChoice(globalChoice - transitionMatrix.getRowGroupIndices()[mecState], mecState);
++localMecChoiceIt;
}
}
return (maxDiff + minDiff) / (storm::utility::convertNumber<ValueType>(2.0) * scalingFactor);
}
template<typename ValueType>
template<typename RewardModelType>
ValueType SparseMdpPrctlHelper<ValueType>::computeLraForMaximalEndComponentLP(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec) {
std::shared_ptr<storm::solver::LpSolver<ValueType>> solver = storm::utility::solver::getLpSolver<ValueType>("LRA for MEC");
solver->setOptimizationDirection(invert(dir));
// First, we need to create the variables for the problem.
std::map<uint_fast64_t, storm::expressions::Variable> stateToVariableMap;
for (auto const& stateChoicesPair : mec) {
std::string variableName = "h" + std::to_string(stateChoicesPair.first);
stateToVariableMap[stateChoicesPair.first] = solver->addUnboundedContinuousVariable(variableName);
}
storm::expressions::Variable lambda = solver->addUnboundedContinuousVariable("L", 1);
solver->update();
// Now we encode the problem as constraints.
for (auto const& stateChoicesPair : mec) {
uint_fast64_t state = stateChoicesPair.first;
// Now, based on the type of the state, create a suitable constraint.
for (auto choice : stateChoicesPair.second) {
storm::expressions::Expression constraint = -lambda;
for (auto element : transitionMatrix.getRow(choice)) {
constraint = constraint + stateToVariableMap.at(element.getColumn()) * solver->getConstant(element.getValue());
}
typename RewardModelType::ValueType r = rewardModel.getTotalStateActionReward(state, choice, transitionMatrix);
constraint = solver->getConstant(r) + constraint;
if (dir == OptimizationDirection::Minimize) {
constraint = stateToVariableMap.at(state) <= constraint;
} else {
constraint = stateToVariableMap.at(state) >= constraint;
}
solver->addConstraint("state" + std::to_string(state) + "," + std::to_string(choice), constraint);
}
}
solver->optimize();
return solver->getContinuousValue(lambda);
}
template<typename ValueType>
std::unique_ptr<CheckResult> SparseMdpPrctlHelper<ValueType>::computeConditionalProbabilities(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, storm::storage::BitVector const& targetStates, storm::storage::BitVector const& conditionStates) {
@ -1818,10 +1359,6 @@ namespace storm {
template std::vector<double> SparseMdpPrctlHelper<double>::computeCumulativeRewards(Environment const& env, storm::solver::SolveGoal<double>&& goal, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, uint_fast64_t stepBound);
template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal<double>&& goal, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint);
template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeTotalRewards(Environment const& env, storm::solver::SolveGoal<double>&& goal, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint);
template MDPSparseModelCheckingHelperReturnType<double> SparseMdpPrctlHelper<double>::computeLongRunAverageRewards(Environment const& env, storm::solver::SolveGoal<double>&& goal, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::storage::SparseMatrix<double> const& backwardTransitions, storm::models::sparse::StandardRewardModel<double> const& rewardModel, bool produceScheduler);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponent(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<double>>& scheduler);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponentVI(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<double>>& scheduler);
template double SparseMdpPrctlHelper<double>::computeLraForMaximalEndComponentLP(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<double> const& transitionMatrix, storm::models::sparse::StandardRewardModel<double> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
#ifdef STORM_HAVE_CARL
template class SparseMdpPrctlHelper<storm::RationalNumber>;
@ -1829,10 +1366,6 @@ namespace storm {
template std::vector<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeCumulativeRewards(Environment const& env, storm::solver::SolveGoal<storm::RationalNumber>&& goal, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, uint_fast64_t stepBound);
template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal<storm::RationalNumber>&& goal, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint);
template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeTotalRewards(Environment const& env, storm::solver::SolveGoal<storm::RationalNumber>&& goal, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint);
template MDPSparseModelCheckingHelperReturnType<storm::RationalNumber> SparseMdpPrctlHelper<storm::RationalNumber>::computeLongRunAverageRewards(Environment const& env, storm::solver::SolveGoal<storm::RationalNumber>&& goal, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::storage::SparseMatrix<storm::RationalNumber> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, bool produceScheduler);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponent(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<storm::RationalNumber>>& scheduler);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponentVI(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<storm::RationalNumber>>& scheduler);
template storm::RationalNumber SparseMdpPrctlHelper<storm::RationalNumber>::computeLraForMaximalEndComponentLP(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<storm::RationalNumber> const& transitionMatrix, storm::models::sparse::StandardRewardModel<storm::RationalNumber> const& rewardModel, storm::storage::MaximalEndComponent const& mec);
#endif
}
}

13
src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h

@ -67,24 +67,11 @@ namespace storm {
static std::vector<ValueType> computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, storm::models::sparse::StandardRewardModel<storm::Interval> const& intervalRewardModel, bool lowerBoundOfIntervals, storm::storage::BitVector const& targetStates, bool qualitative);
#endif
static MDPSparseModelCheckingHelperReturnType<ValueType> computeLongRunAverageProbabilities(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, storm::storage::BitVector const& psiStates, bool produceScheduler);
template<typename RewardModelType>
static MDPSparseModelCheckingHelperReturnType<ValueType> computeLongRunAverageRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, RewardModelType const& rewardModel, bool produceScheduler);
static std::unique_ptr<CheckResult> computeConditionalProbabilities(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, storm::storage::BitVector const& targetStates, storm::storage::BitVector const& conditionStates);
private:
static MDPSparseModelCheckingHelperReturnType<ValueType> computeReachabilityRewardsHelper(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, std::function<std::vector<ValueType>(uint_fast64_t, storm::storage::SparseMatrix<ValueType> const&, storm::storage::BitVector const&)> const& totalStateRewardVectorGetter, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, std::function<storm::storage::BitVector()> const& zeroRewardStatesGetter, std::function<storm::storage::BitVector()> const& zeroRewardChoicesGetter, ModelCheckerHint const& hint = ModelCheckerHint());
template<typename RewardModelType>
static ValueType computeLraForMaximalEndComponent(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<ValueType>>& scheduler);
template<typename RewardModelType>
static ValueType computeLraForMaximalEndComponentVI(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec, std::unique_ptr<storm::storage::Scheduler<ValueType>>& scheduler);
template<typename RewardModelType>
static ValueType computeLraForMaximalEndComponentLP(Environment const& env, OptimizationDirection dir, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, storm::storage::MaximalEndComponent const& mec);
};
}

Loading…
Cancel
Save