Browse Source

total rewards for mdps

tempestpy_adaptions
TimQu 7 years ago
parent
commit
c2dd57cda5
  1. 75
      src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp
  2. 3
      src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h
  3. 34
      src/storm/transformer/EndComponentEliminator.h

75
src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp

@ -35,6 +35,8 @@
#include "storm/utility/ProgressMeasurement.h"
#include "storm/utility/export.h"
#include "storm/transformer/EndComponentEliminator.h"
#include "storm/environment/solver/MinMaxSolverEnvironment.h"
#include "storm/exceptions/InvalidStateException.h"
@ -839,6 +841,79 @@ namespace storm {
return result;
}
template<typename ValueType>
template<typename RewardModelType>
MDPSparseModelCheckingHelperReturnType<ValueType> SparseMdpPrctlHelper<ValueType>::computeTotalRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, RewardModelType const& rewardModel, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint) {
// Reduce to reachability rewards
if (goal.minimize) {
STORM_LOG_ERROR_COND(!produceScheduler, "Can not produce scheduler for this property (functionality not implemented");
// Identify the states from which no reward can be collected under some scheduler
storm::storage::BitVector choicesWithoutReward = rewardModel.getChoicesWithZeroReward(transitionMatrix);
storm::storage::BitVector statesWithZeroRewardChoice(transitionMatrix.getRowGroupCount(), false);
for (uint64_t state = 0; state < transitionMatrix.getRowGroupCount(); ++state) {
if (choicesWithoutReward.getNextSetIndex(transitionMatrix.getRowGroupIndices()[state])< transitionMatrix.getRowGroupIndices()[state + 1]) {
statesWithZeroRewardChoice.set(state);
}
}
storm::storage::BitVector rew0EStates = storm::utility::graph::performProbGreater0A(transitionMatrix, transitionMatrix.getRowGroupIndices(), backwardTransitions, statesWithZeroRewardChoice, ~statesWithZeroRewardChoice, false, 0, choicesWithoutReward);
rew0EStates.complement();
return computeReachabilityRewards(env, std::move(goal), transitionMatrix, backwardTransitions, rewardModel, rew0EStates, qualitative, false, hint);
} else {
// Identify the states from which only states with zero reward are reachable.
storm::storage::BitVector statesWithoutReward = rewardModel.getStatesWithZeroReward(transitionMatrix);
storm::storage::BitVector rew0AStates = storm::utility::graph::performProbGreater0E(backwardTransitions, statesWithoutReward, ~statesWithoutReward);
rew0AStates.complement();
// There might be end components that consists only of states/choices with zero rewards. The reachability reward semantics would assign such
// end components reward infinity. To avoid this, we potentially need to eliminate such end components
storm::storage::BitVector trueStates(transitionMatrix.getRowGroupCount(), true);
if (storm::utility::graph::performProb1A(transitionMatrix, transitionMatrix.getRowGroupIndices(), backwardTransitions, trueStates, rew0AStates).full()) {
return computeReachabilityRewards(env, std::move(goal), transitionMatrix, backwardTransitions, rewardModel, rew0AStates, qualitative, produceScheduler, hint);
} else {
// The transformation of schedulers for the ec-eliminated system back to the original one is not implemented.
STORM_LOG_ERROR_COND(!produceScheduler, "Can not produce scheduler for this property (functionality not implemented");
storm::storage::BitVector choicesWithoutReward = rewardModel.getChoicesWithZeroReward(transitionMatrix);
auto ecElimResult = storm::transformer::EndComponentEliminator<ValueType>::transform(transitionMatrix, storm::storage::BitVector(transitionMatrix.getRowGroupCount(), true), choicesWithoutReward, rew0AStates, true);
storm::storage::BitVector newRew0AStates(ecElimResult.matrix.getRowGroupCount(), false);
for (auto const& oldRew0AState : rew0AStates) {
newRew0AStates.set(ecElimResult.oldToNewStateMapping[oldRew0AState]);
}
return computeReachabilityRewardsHelper(env, std::move(goal), ecElimResult.matrix, ecElimResult.matrix.transpose(true),
[&] (uint_fast64_t rowCount, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::BitVector const& maybeStates) {
std::vector<ValueType> result;
std::vector<ValueType> oldChoiceRewards = rewardModel.getTotalRewardVector(rowCount, transitionMatrix);
result.reserve(rowCount);
for (uint64_t newState : maybeStates) {
for (uint64_t newChoice = transitionMatrix.getRowGroupIndices()[newState]; newChoice < transitionMatrix.getRowGroupIndices()[newState + 1]; ++newChoice) {
uint64_t oldChoice = ecElimResult.newToOldRowMapping[newChoice];
result.push_back(oldChoiceRewards[oldChoice]);
}
}
STORM_LOG_ASSERT(result.size() == rowCount, "Unexpected size of reward vector.");
return result;
}, newRew0AStates, qualitative, false,
[&] () {
storm::storage::BitVector newStatesWithoutReward(ecElimResult.matrix.getRowGroupCount(), false);
for (auto const& oldStateWithoutRew : statesWithoutReward) {
newStatesWithoutReward.set(ecElimResult.oldToNewStateMapping[oldStateWithoutRew]);
}
return newStatesWithoutReward;
},
[&] () {
storm::storage::BitVector newChoicesWithoutReward(ecElimResult.matrix.getRowGroupCount(), false);
for (uint64_t newChoice = 0; newChoice < ecElimResult.matrix.getRowCount(); ++newChoice) {
if (choicesWithoutReward.get(ecElimResult.newToOldChoiceMapping[newChoice])) {
newChoicesWithoutReward.set(newChoice);
}
}
return newChoicesWithoutReward;
});
}
}
}
template<typename ValueType>
template<typename RewardModelType>
MDPSparseModelCheckingHelperReturnType<ValueType> SparseMdpPrctlHelper<ValueType>::computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, RewardModelType const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint) {

3
src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h

@ -55,6 +55,9 @@ namespace storm {
template<typename RewardModelType>
static std::vector<ValueType> computeCumulativeRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, RewardModelType const& rewardModel, uint_fast64_t stepBound);
template<typename RewardModelType>
static MDPSparseModelCheckingHelperReturnType<ValueType> computeTotalRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, RewardModelType const& rewardModel, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint = ModelCheckerHint());
template<typename RewardModelType>
static MDPSparseModelCheckingHelperReturnType<ValueType> computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal<ValueType>&& goal, storm::storage::SparseMatrix<ValueType> const& transitionMatrix, storm::storage::SparseMatrix<ValueType> const& backwardTransitions, RewardModelType const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint = ModelCheckerHint());

34
src/storm/transformer/EndComponentEliminator.h

@ -18,7 +18,7 @@ namespace storm {
// The resulting matrix
storm::storage::SparseMatrix<ValueType> matrix;
// Index mapping that gives for each row of the resulting matrix the corresponding row in the original matrix.
// For the empty rows added to EC states, an arbitrary row of the original matrix that stays inside the EC is given.
// For the sink rows added to EC states, an arbitrary row of the original matrix that stays inside the EC is given.
std::vector<uint_fast64_t> newToOldRowMapping;
// Gives for each state (=rowGroup) of the original matrix the corresponding state in the resulting matrix.
// States of a removed ECs are mapped to the state that substitutes the EC.
@ -38,9 +38,10 @@ namespace storm {
*
* For each such EC (that is not contained in another EC), we add a new state and redirect all incoming and outgoing
* transitions of the EC to (and from) this state.
* If addEmptyRowStates is true for at least one state of an eliminated EC, an empty row is added to the new state (representing the choice to stay at the EC forever).
* If addSinkRowStates is true for at least one state of an eliminated EC, a row is added to the new state (representing the choice to stay at the EC forever).
* If addSelfLoopAtSinkStates is true, such rows get a selfloop (with value 1). Otherwise, the row remains empty.
*/
static EndComponentEliminatorReturnType transform(storm::storage::SparseMatrix<ValueType> const& originalMatrix, storm::storage::BitVector const& subsystemStates, storm::storage::BitVector const& possibleECRows, storm::storage::BitVector const& addEmptyRowStates) {
static EndComponentEliminatorReturnType transform(storm::storage::SparseMatrix<ValueType> const& originalMatrix, storm::storage::BitVector const& subsystemStates, storm::storage::BitVector const& possibleECRows, storm::storage::BitVector const& addSinkRowStates, bool addSelfLoopAtSinkStates = false) {
STORM_LOG_DEBUG("Invoked EndComponentEliminator on matrix with " << originalMatrix.getRowGroupCount() << " row groups.");
storm::storage::MaximalEndComponentDecomposition<ValueType> ecs = computeECs(originalMatrix, possibleECRows, subsystemStates);
@ -57,7 +58,7 @@ namespace storm {
EndComponentEliminatorReturnType result;
std::vector<uint_fast64_t> newRowGroupIndices;
result.oldToNewStateMapping = std::vector<uint_fast64_t>(originalMatrix.getRowGroupCount(), std::numeric_limits<uint_fast64_t>::max());
storm::storage::BitVector emptyRows(originalMatrix.getRowCount(), false); // will be resized as soon as the rowCount of the resulting matrix is known
storm::storage::BitVector sinkRows(originalMatrix.getRowCount(), false); // will be resized as soon as the rowCount of the resulting matrix is known
for(auto keptState : keptStates) {
result.oldToNewStateMapping[keptState] = newRowGroupIndices.size(); // i.e., the current number of processed states
@ -68,7 +69,7 @@ namespace storm {
}
for (auto const& ec : ecs) {
newRowGroupIndices.push_back(result.newToOldRowMapping.size());
bool ecGetsEmptyRow = false;
bool ecGetsSinkRow = false;
for (auto const& stateActionsPair : ec) {
result.oldToNewStateMapping[stateActionsPair.first] = newRowGroupIndices.size()-1;
for(uint_fast64_t row = originalMatrix.getRowGroupIndices()[stateActionsPair.first]; row < originalMatrix.getRowGroupIndices()[stateActionsPair.first +1]; ++row) {
@ -76,18 +77,18 @@ namespace storm {
result.newToOldRowMapping.push_back(row);
}
}
ecGetsEmptyRow |= addEmptyRowStates.get(stateActionsPair.first);
ecGetsSinkRow |= addSinkRowStates.get(stateActionsPair.first);
}
if(ecGetsEmptyRow) {
if(ecGetsSinkRow) {
STORM_LOG_ASSERT(result.newToOldRowMapping.size() < originalMatrix.getRowCount(), "Didn't expect to see more rows in the reduced matrix than in the original one.");
emptyRows.set(result.newToOldRowMapping.size(), true);
sinkRows.set(result.newToOldRowMapping.size(), true);
result.newToOldRowMapping.push_back(*ec.begin()->second.begin());
}
}
newRowGroupIndices.push_back(result.newToOldRowMapping.size());
emptyRows.resize(result.newToOldRowMapping.size());
sinkRows.resize(result.newToOldRowMapping.size());
result.matrix = buildTransformedMatrix(originalMatrix, newRowGroupIndices, result.newToOldRowMapping, result.oldToNewStateMapping, emptyRows);
result.matrix = buildTransformedMatrix(originalMatrix, newRowGroupIndices, result.newToOldRowMapping, result.oldToNewStateMapping, sinkRows, addSelfLoopAtSinkStates);
STORM_LOG_DEBUG("EndComponentEliminator is done. Resulting matrix has " << result.matrix.getRowGroupCount() << " row groups.");
return result;
}
@ -136,15 +137,20 @@ namespace storm {
std::vector<uint_fast64_t> const& newRowGroupIndices,
std::vector<uint_fast64_t> const& newToOldRowMapping,
std::vector<uint_fast64_t> const& oldToNewStateMapping,
storm::storage::BitVector const& emptyRows) {
storm::storage::BitVector const& sinkRows,
bool addSelfLoopAtSinkStates) {
uint_fast64_t numRowGroups = newRowGroupIndices.size()-1;
uint_fast64_t newRow = 0;
storm::storage::SparseMatrixBuilder<ValueType> builder(newToOldRowMapping.size(), numRowGroups, originalMatrix.getEntryCount(), false, true, numRowGroups);
for(uint_fast64_t newRowGroup = 0; newRowGroup < numRowGroups; ++newRowGroup) {
for (uint_fast64_t newRowGroup = 0; newRowGroup < numRowGroups; ++newRowGroup) {
builder.newRowGroup(newRow);
for(; newRow < newRowGroupIndices[newRowGroup+1]; ++newRow) {
if(!emptyRows.get(newRow)) {
for (; newRow < newRowGroupIndices[newRowGroup + 1]; ++newRow) {
if (sinkRows.get(newRow)) {
if (addSelfLoopAtSinkStates) {
builder.addNextValue(newRow, newRowGroup, storm::utility::one<ValueType>());
}
} else {
// Make sure that the entries for this row are inserted in the right order.
// Also, transitions to the same EC need to be merged and transitions to states that are erased need to be ignored
std::map<uint_fast64_t, ValueType> sortedEntries;

Loading…
Cancel
Save