Browse Source

WeightVectorChecker: Making initialization LRA-ready

tempestpy_adaptions
Tim Quatmann 4 years ago
parent
commit
29a7a7e865
  1. 8
      src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp
  2. 2
      src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp
  3. 15
      src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp
  4. 11
      src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h

8
src/storm/modelchecker/multiobjective/pcaa/StandardMaPcaaWeightVectorChecker.cpp

@ -35,7 +35,8 @@ namespace storm {
exitRates = model.getExitRates();
// Set the (discretized) state action rewards.
this->actionRewards.resize(this->objectives.size());
this->actionRewards.assign(this->objectives.size(), {});
this->stateRewards.assign(this->objectives.size(), {});
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
auto const& formula = *this->objectives[objIndex].formula;
STORM_LOG_THROW(formula.isRewardOperatorFormula() && formula.asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::UnexpectedException, "Unexpected type of operator formula: " << formula);
@ -49,6 +50,11 @@ namespace storm {
this->actionRewards[objIndex][model.getTransitionMatrix().getRowGroupIndices()[markovianState]] += rewModel.getStateReward(markovianState) / exitRates[markovianState];
}
}
} else if (formula.getSubformula().isLongRunAverageRewardFormula()) {
// The LRA methods for MA require keeping track of state- and action rewards separately
if (rewModel.hasStateRewards()) {
this->stateRewards[objIndex] = rewModel.getStateRewardVector();
}
} else {
STORM_LOG_THROW(formula.getSubformula().isCumulativeRewardFormula() && formula.getSubformula().asCumulativeRewardFormula().getTimeBoundReference().isTimeBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
STORM_LOG_THROW(!rewModel.hasStateRewards(), storm::exceptions::InvalidPropertyException, "Found state rewards for time bounded objective " << this->objectives[objIndex].originalFormula << ". This is not supported.");

2
src/storm/modelchecker/multiobjective/pcaa/StandardMdpPcaaWeightVectorChecker.cpp

@ -37,7 +37,7 @@ namespace storm {
auto const& cumulativeRewardFormula = formula.getSubformula().asCumulativeRewardFormula();
STORM_LOG_THROW(!cumulativeRewardFormula.isMultiDimensional() && !cumulativeRewardFormula.getTimeBoundReference().isRewardBound(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
} else {
STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula() || formula.getSubformula().isLongRunAverageRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
}
typename SparseMdpModelType::RewardModelType const& rewModel = model.getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());

15
src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.cpp

@ -40,19 +40,19 @@ namespace storm {
auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis<SparseModelType>::analyze(preprocessorResult);
STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "There is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported.");
STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing.");
STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (long run, total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker.");
STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states.");
// Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers.
// We can also merge the states that will have reward zero anyway.
storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates;
storm::storage::BitVector finiteRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
storm::storage::BitVector finiteTotalRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get());
std::set<std::string> relevantRewardModels;
for (auto const& obj : this->objectives) {
obj.formula->gatherReferencedRewardModels(relevantRewardModels);
}
storm::transformer::GoalStateMerger<SparseModelType> merger(*preprocessorResult.preprocessedModel);
auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()), finiteRewardChoices);
auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector<std::string>(relevantRewardModels.begin(), relevantRewardModels.end()), finiteTotalRewardChoices);
// Initialize data specific for the considered model type
initializeModelTypeSpecificData(*mergerResult.model);
@ -60,10 +60,10 @@ namespace storm {
// Initilize general data of the model
transitionMatrix = std::move(mergerResult.model->getTransitionMatrix());
initialState = *mergerResult.model->getInitialStates().begin();
reward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
totalReward0EStates = rewardAnalysis.totalReward0EStates % maybeStates;
if (mergerResult.targetState) {
// There is an additional state in the result
reward0EStates.resize(reward0EStates.size() + 1, true);
totalReward0EStates.resize(totalReward0EStates.size() + 1, true);
// The overapproximation for the possible ec choices consists of the states that can reach the target states with prob. 0 and the target state itself.
storm::storage::BitVector targetStateAsVector(transitionMatrix.getRowGroupCount(), false);
@ -75,6 +75,7 @@ namespace storm {
}
// set data for unbounded objectives
lraObjectives = storm::storage::BitVector(this->objectives.size(), false);
objectivesWithNoUpperTimeBound = storm::storage::BitVector(this->objectives.size(), false);
actionsWithoutRewardInUnboundedPhase = storm::storage::BitVector(transitionMatrix.getRowCount(), true);
for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
@ -83,6 +84,10 @@ namespace storm {
objectivesWithNoUpperTimeBound.set(objIndex, true);
actionsWithoutRewardInUnboundedPhase &= storm::utility::vector::filterZero(actionRewards[objIndex]);
}
if (formula.getSubformula().isLongRunAverageRewardFormula()) {
lraObjectives.set(objIndex, true);
objectivesWithNoUpperTimeBound.set(objIndex, true);
}
}
// initialize data for the results

11
src/storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h

@ -117,11 +117,16 @@ namespace storm {
storm::storage::BitVector ecChoicesHint;
// The actions that have reward assigned for at least one objective without upper timeBound
storm::storage::BitVector actionsWithoutRewardInUnboundedPhase;
// The states for which there is a scheduler yielding reward 0 for each objective
storm::storage::BitVector reward0EStates;
// The states for which there is a scheduler yielding reward 0 for each total reward objective
storm::storage::BitVector totalReward0EStates;
// stores the state action rewards for each objective.
std::vector<std::vector<ValueType>> actionRewards;
// stores the state rewards for each objective.
// These are only relevant for LRA objectives for MAs (otherwise, they appear within the action rewards). For other objectives/models, the corresponding vector will be empty.
std::vector<std::vector<ValueType>> stateRewards;
// stores the indices of the objectives for which we need to compute the long run average values
storm::storage::BitVector lraObjectives;
// stores the indices of the objectives for which there is no upper time bound
storm::storage::BitVector objectivesWithNoUpperTimeBound;
@ -137,7 +142,7 @@ namespace storm {
std::vector