From c2dd57cda57319a548c2ee1de3f50cc718a205fc Mon Sep 17 00:00:00 2001 From: TimQu Date: Fri, 22 Jun 2018 17:28:01 +0200 Subject: [PATCH] total rewards for mdps --- .../prctl/helper/SparseMdpPrctlHelper.cpp | 75 +++++++++++++++++++ .../prctl/helper/SparseMdpPrctlHelper.h | 3 + .../transformer/EndComponentEliminator.h | 34 +++++---- 3 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp b/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp index 8c13c73c4..e2ce02120 100644 --- a/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp +++ b/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.cpp @@ -35,6 +35,8 @@ #include "storm/utility/ProgressMeasurement.h" #include "storm/utility/export.h" +#include "storm/transformer/EndComponentEliminator.h" + #include "storm/environment/solver/MinMaxSolverEnvironment.h" #include "storm/exceptions/InvalidStateException.h" @@ -839,6 +841,79 @@ namespace storm { return result; } + template + template + MDPSparseModelCheckingHelperReturnType SparseMdpPrctlHelper::computeTotalRewards(Environment const& env, storm::solver::SolveGoal&& goal, storm::storage::SparseMatrix const& transitionMatrix, storm::storage::SparseMatrix const& backwardTransitions, RewardModelType const& rewardModel, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint) { + + // Reduce to reachability rewards + if (goal.minimize) { + STORM_LOG_ERROR_COND(!produceScheduler, "Can not produce scheduler for this property (functionality not implemented"); + // Identify the states from which no reward can be collected under some scheduler + storm::storage::BitVector choicesWithoutReward = rewardModel.getChoicesWithZeroReward(transitionMatrix); + storm::storage::BitVector statesWithZeroRewardChoice(transitionMatrix.getRowGroupCount(), false); + for (uint64_t state = 0; state < transitionMatrix.getRowGroupCount(); ++state) { + if (choicesWithoutReward.getNextSetIndex(transitionMatrix.getRowGroupIndices()[state])< transitionMatrix.getRowGroupIndices()[state + 1]) { + statesWithZeroRewardChoice.set(state); + } + } + storm::storage::BitVector rew0EStates = storm::utility::graph::performProbGreater0A(transitionMatrix, transitionMatrix.getRowGroupIndices(), backwardTransitions, statesWithZeroRewardChoice, ~statesWithZeroRewardChoice, false, 0, choicesWithoutReward); + rew0EStates.complement(); + return computeReachabilityRewards(env, std::move(goal), transitionMatrix, backwardTransitions, rewardModel, rew0EStates, qualitative, false, hint); + } else { + // Identify the states from which only states with zero reward are reachable. + storm::storage::BitVector statesWithoutReward = rewardModel.getStatesWithZeroReward(transitionMatrix); + storm::storage::BitVector rew0AStates = storm::utility::graph::performProbGreater0E(backwardTransitions, statesWithoutReward, ~statesWithoutReward); + rew0AStates.complement(); + + // There might be end components that consists only of states/choices with zero rewards. The reachability reward semantics would assign such + // end components reward infinity. To avoid this, we potentially need to eliminate such end components + storm::storage::BitVector trueStates(transitionMatrix.getRowGroupCount(), true); + if (storm::utility::graph::performProb1A(transitionMatrix, transitionMatrix.getRowGroupIndices(), backwardTransitions, trueStates, rew0AStates).full()) { + return computeReachabilityRewards(env, std::move(goal), transitionMatrix, backwardTransitions, rewardModel, rew0AStates, qualitative, produceScheduler, hint); + } else { + // The transformation of schedulers for the ec-eliminated system back to the original one is not implemented. + STORM_LOG_ERROR_COND(!produceScheduler, "Can not produce scheduler for this property (functionality not implemented"); + storm::storage::BitVector choicesWithoutReward = rewardModel.getChoicesWithZeroReward(transitionMatrix); + auto ecElimResult = storm::transformer::EndComponentEliminator::transform(transitionMatrix, storm::storage::BitVector(transitionMatrix.getRowGroupCount(), true), choicesWithoutReward, rew0AStates, true); + storm::storage::BitVector newRew0AStates(ecElimResult.matrix.getRowGroupCount(), false); + for (auto const& oldRew0AState : rew0AStates) { + newRew0AStates.set(ecElimResult.oldToNewStateMapping[oldRew0AState]); + } + + return computeReachabilityRewardsHelper(env, std::move(goal), ecElimResult.matrix, ecElimResult.matrix.transpose(true), + [&] (uint_fast64_t rowCount, storm::storage::SparseMatrix const& transitionMatrix, storm::storage::BitVector const& maybeStates) { + std::vector result; + std::vector oldChoiceRewards = rewardModel.getTotalRewardVector(rowCount, transitionMatrix); + result.reserve(rowCount); + for (uint64_t newState : maybeStates) { + for (uint64_t newChoice = transitionMatrix.getRowGroupIndices()[newState]; newChoice < transitionMatrix.getRowGroupIndices()[newState + 1]; ++newChoice) { + uint64_t oldChoice = ecElimResult.newToOldRowMapping[newChoice]; + result.push_back(oldChoiceRewards[oldChoice]); + } + } + STORM_LOG_ASSERT(result.size() == rowCount, "Unexpected size of reward vector."); + return result; + }, newRew0AStates, qualitative, false, + [&] () { + storm::storage::BitVector newStatesWithoutReward(ecElimResult.matrix.getRowGroupCount(), false); + for (auto const& oldStateWithoutRew : statesWithoutReward) { + newStatesWithoutReward.set(ecElimResult.oldToNewStateMapping[oldStateWithoutRew]); + } + return newStatesWithoutReward; + }, + [&] () { + storm::storage::BitVector newChoicesWithoutReward(ecElimResult.matrix.getRowGroupCount(), false); + for (uint64_t newChoice = 0; newChoice < ecElimResult.matrix.getRowCount(); ++newChoice) { + if (choicesWithoutReward.get(ecElimResult.newToOldChoiceMapping[newChoice])) { + newChoicesWithoutReward.set(newChoice); + } + } + return newChoicesWithoutReward; + }); + } + } + } + template template MDPSparseModelCheckingHelperReturnType SparseMdpPrctlHelper::computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal&& goal, storm::storage::SparseMatrix const& transitionMatrix, storm::storage::SparseMatrix const& backwardTransitions, RewardModelType const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint) { diff --git a/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h b/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h index 920bb4d47..b1de7a442 100644 --- a/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h +++ b/src/storm/modelchecker/prctl/helper/SparseMdpPrctlHelper.h @@ -55,6 +55,9 @@ namespace storm { template static std::vector computeCumulativeRewards(Environment const& env, storm::solver::SolveGoal&& goal, storm::storage::SparseMatrix const& transitionMatrix, RewardModelType const& rewardModel, uint_fast64_t stepBound); + template + static MDPSparseModelCheckingHelperReturnType computeTotalRewards(Environment const& env, storm::solver::SolveGoal&& goal, storm::storage::SparseMatrix const& transitionMatrix, storm::storage::SparseMatrix const& backwardTransitions, RewardModelType const& rewardModel, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint = ModelCheckerHint()); + template static MDPSparseModelCheckingHelperReturnType computeReachabilityRewards(Environment const& env, storm::solver::SolveGoal&& goal, storm::storage::SparseMatrix const& transitionMatrix, storm::storage::SparseMatrix const& backwardTransitions, RewardModelType const& rewardModel, storm::storage::BitVector const& targetStates, bool qualitative, bool produceScheduler, ModelCheckerHint const& hint = ModelCheckerHint()); diff --git a/src/storm/transformer/EndComponentEliminator.h b/src/storm/transformer/EndComponentEliminator.h index 635cba7b6..378edc86a 100644 --- a/src/storm/transformer/EndComponentEliminator.h +++ b/src/storm/transformer/EndComponentEliminator.h @@ -18,7 +18,7 @@ namespace storm { // The resulting matrix storm::storage::SparseMatrix matrix; // Index mapping that gives for each row of the resulting matrix the corresponding row in the original matrix. - // For the empty rows added to EC states, an arbitrary row of the original matrix that stays inside the EC is given. + // For the sink rows added to EC states, an arbitrary row of the original matrix that stays inside the EC is given. std::vector newToOldRowMapping; // Gives for each state (=rowGroup) of the original matrix the corresponding state in the resulting matrix. // States of a removed ECs are mapped to the state that substitutes the EC. @@ -38,9 +38,10 @@ namespace storm { * * For each such EC (that is not contained in another EC), we add a new state and redirect all incoming and outgoing * transitions of the EC to (and from) this state. - * If addEmptyRowStates is true for at least one state of an eliminated EC, an empty row is added to the new state (representing the choice to stay at the EC forever). + * If addSinkRowStates is true for at least one state of an eliminated EC, a row is added to the new state (representing the choice to stay at the EC forever). + * If addSelfLoopAtSinkStates is true, such rows get a selfloop (with value 1). Otherwise, the row remains empty. */ - static EndComponentEliminatorReturnType transform(storm::storage::SparseMatrix const& originalMatrix, storm::storage::BitVector const& subsystemStates, storm::storage::BitVector const& possibleECRows, storm::storage::BitVector const& addEmptyRowStates) { + static EndComponentEliminatorReturnType transform(storm::storage::SparseMatrix const& originalMatrix, storm::storage::BitVector const& subsystemStates, storm::storage::BitVector const& possibleECRows, storm::storage::BitVector const& addSinkRowStates, bool addSelfLoopAtSinkStates = false) { STORM_LOG_DEBUG("Invoked EndComponentEliminator on matrix with " << originalMatrix.getRowGroupCount() << " row groups."); storm::storage::MaximalEndComponentDecomposition ecs = computeECs(originalMatrix, possibleECRows, subsystemStates); @@ -57,7 +58,7 @@ namespace storm { EndComponentEliminatorReturnType result; std::vector newRowGroupIndices; result.oldToNewStateMapping = std::vector(originalMatrix.getRowGroupCount(), std::numeric_limits::max()); - storm::storage::BitVector emptyRows(originalMatrix.getRowCount(), false); // will be resized as soon as the rowCount of the resulting matrix is known + storm::storage::BitVector sinkRows(originalMatrix.getRowCount(), false); // will be resized as soon as the rowCount of the resulting matrix is known for(auto keptState : keptStates) { result.oldToNewStateMapping[keptState] = newRowGroupIndices.size(); // i.e., the current number of processed states @@ -68,7 +69,7 @@ namespace storm { } for (auto const& ec : ecs) { newRowGroupIndices.push_back(result.newToOldRowMapping.size()); - bool ecGetsEmptyRow = false; + bool ecGetsSinkRow = false; for (auto const& stateActionsPair : ec) { result.oldToNewStateMapping[stateActionsPair.first] = newRowGroupIndices.size()-1; for(uint_fast64_t row = originalMatrix.getRowGroupIndices()[stateActionsPair.first]; row < originalMatrix.getRowGroupIndices()[stateActionsPair.first +1]; ++row) { @@ -76,18 +77,18 @@ namespace storm { result.newToOldRowMapping.push_back(row); } } - ecGetsEmptyRow |= addEmptyRowStates.get(stateActionsPair.first); + ecGetsSinkRow |= addSinkRowStates.get(stateActionsPair.first); } - if(ecGetsEmptyRow) { + if(ecGetsSinkRow) { STORM_LOG_ASSERT(result.newToOldRowMapping.size() < originalMatrix.getRowCount(), "Didn't expect to see more rows in the reduced matrix than in the original one."); - emptyRows.set(result.newToOldRowMapping.size(), true); + sinkRows.set(result.newToOldRowMapping.size(), true); result.newToOldRowMapping.push_back(*ec.begin()->second.begin()); } } newRowGroupIndices.push_back(result.newToOldRowMapping.size()); - emptyRows.resize(result.newToOldRowMapping.size()); + sinkRows.resize(result.newToOldRowMapping.size()); - result.matrix = buildTransformedMatrix(originalMatrix, newRowGroupIndices, result.newToOldRowMapping, result.oldToNewStateMapping, emptyRows); + result.matrix = buildTransformedMatrix(originalMatrix, newRowGroupIndices, result.newToOldRowMapping, result.oldToNewStateMapping, sinkRows, addSelfLoopAtSinkStates); STORM_LOG_DEBUG("EndComponentEliminator is done. Resulting matrix has " << result.matrix.getRowGroupCount() << " row groups."); return result; } @@ -136,15 +137,20 @@ namespace storm { std::vector const& newRowGroupIndices, std::vector const& newToOldRowMapping, std::vector const& oldToNewStateMapping, - storm::storage::BitVector const& emptyRows) { + storm::storage::BitVector const& sinkRows, + bool addSelfLoopAtSinkStates) { uint_fast64_t numRowGroups = newRowGroupIndices.size()-1; uint_fast64_t newRow = 0; storm::storage::SparseMatrixBuilder builder(newToOldRowMapping.size(), numRowGroups, originalMatrix.getEntryCount(), false, true, numRowGroups); - for(uint_fast64_t newRowGroup = 0; newRowGroup < numRowGroups; ++newRowGroup) { + for (uint_fast64_t newRowGroup = 0; newRowGroup < numRowGroups; ++newRowGroup) { builder.newRowGroup(newRow); - for(; newRow < newRowGroupIndices[newRowGroup+1]; ++newRow) { - if(!emptyRows.get(newRow)) { + for (; newRow < newRowGroupIndices[newRowGroup + 1]; ++newRow) { + if (sinkRows.get(newRow)) { + if (addSelfLoopAtSinkStates) { + builder.addNextValue(newRow, newRowGroup, storm::utility::one()); + } + } else { // Make sure that the entries for this row are inserted in the right order. // Also, transitions to the same EC need to be merged and transitions to states that are erased need to be ignored std::map sortedEntries;