Browse Source

improved multi-dimensional reward unfolding

tempestpy_adaptions
TimQu 7 years ago
parent
commit
354b3103f0
  1. 14
      src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp
  2. 82
      src/storm/modelchecker/multiobjective/pcaa/SparseMdpPcaaWeightVectorChecker.cpp
  3. 570
      src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.cpp
  4. 71
      src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h

14
src/storm/modelchecker/multiobjective/SparseMultiObjectivePreprocessor.cpp

@ -267,7 +267,9 @@ namespace storm {
void SparseMultiObjectivePreprocessor<SparseModelType>::preprocessBoundedUntilFormula(storm::logic::BoundedUntilFormula const& formula, storm::logic::OperatorInformation const& opInfo, PreprocessorData& data) {
// Check how to handle this query
if (!formula.getTimeBoundReference().isRewardBound() && (!formula.hasLowerBound() || (!formula.isLowerBoundStrict() && storm::utility::isZero(formula.template getLowerBound<storm::RationalNumber>())))) {
if (formula.getDimension() != 1 || formula.getTimeBoundReference().isRewardBound()) {
data.objectives.back()->formula = std::make_shared<storm::logic::ProbabilityOperatorFormula>(formula.asSharedPointer(), opInfo);
} else if (!formula.hasLowerBound() || (!formula.isLowerBoundStrict() && storm::utility::isZero(formula.template getLowerBound<storm::RationalNumber>()))) {
std::shared_ptr<storm::logic::Formula const> subformula;
if (!formula.hasUpperBound()) {
// The formula is actually unbounded
@ -370,11 +372,7 @@ namespace storm {
std::set<std::string> relevantRewardModels;
for (auto const& obj : result.objectives) {
if (obj.formula->isRewardOperatorFormula()) {
relevantRewardModels.insert(obj.formula->asRewardOperatorFormula().getRewardModelName());
} else {
STORM_LOG_ASSERT(false, "Unknown formula type.");
}
obj.formula->gatherReferencedRewardModels(relevantRewardModels);
}
// Build a subsystem that discards states that yield infinite reward for all schedulers.
@ -437,7 +435,9 @@ namespace storm {
auto const& rewModel = result.preprocessedModel->getRewardModel(obj.formula->asRewardOperatorFormula().getRewardModelName());
zeroRewardChoices &= rewModel.getChoicesWithZeroReward(transitions);
} else {
STORM_LOG_ASSERT(false, "Unknown formula type.");
zeroRewardChoices.clear();
STORM_LOG_WARN("Formula type not handled properly");
//STORM_LOG_ASSERT(false, "Unknown formula type.");
}
}

82
src/storm/modelchecker/multiobjective/pcaa/SparseMdpPcaaWeightVectorChecker.cpp

@ -29,11 +29,13 @@ namespace storm {
// set the state action rewards. Also do some sanity checks on the objectives.
for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
auto const& formula = *objectives[objIndex].formula;
STORM_LOG_THROW(formula.isRewardOperatorFormula() && formula.asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::UnexpectedException, "Unexpected type of operator formula: " << formula);
STORM_LOG_THROW(formula.getSubformula().isCumulativeRewardFormula() || formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
typename SparseMdpModelType::RewardModelType const& rewModel = this->model.getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());
STORM_LOG_THROW(!rewModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Reward model has transition rewards which is not expected.");
this->discreteActionRewards[objIndex] = rewModel.getTotalRewardVector(this->model.getTransitionMatrix());
if (!(formula.isProbabilityOperatorFormula() && formula.getSubformula().isBoundedUntilFormula())) {
STORM_LOG_THROW(formula.isRewardOperatorFormula() && formula.asRewardOperatorFormula().hasRewardModelName(), storm::exceptions::UnexpectedException, "Unexpected type of operator formula: " << formula);
STORM_LOG_THROW(formula.getSubformula().isCumulativeRewardFormula() || formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of sub-formula: " << formula.getSubformula());
typename SparseMdpModelType::RewardModelType const& rewModel = this->model.getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());
STORM_LOG_THROW(!rewModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Reward model has transition rewards which is not expected.");
this->discreteActionRewards[objIndex] = rewModel.getTotalRewardVector(this->model.getTransitionMatrix());
}
}
}
@ -135,11 +137,10 @@ namespace storm {
computeEpochSolution(epoch, weightVector);
}
auto solution = rewardUnfolding->getEpochSolution(initEpoch);
uint64_t initStateInUnfoldedModel = *rewardUnfolding->getModelForEpoch(initEpoch).initialStates.begin();
this->weightedResult[*this->model.getInitialStates().begin()] = solution.weightedValues[initStateInUnfoldedModel];
auto solution = rewardUnfolding->getInitialStateResult(initEpoch);
this->weightedResult[*this->model.getInitialStates().begin()] = solution.weightedValue;
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
this->objectiveResults[objIndex][*this->model.getInitialStates().begin()] = solution.objectiveValues[objIndex][initStateInUnfoldedModel];
this->objectiveResults[objIndex][*this->model.getInitialStates().begin()] = solution.objectiveValues[objIndex];
// Todo: we currently assume precise results...
this->offsetsToUnderApproximation[objIndex] = storm::utility::zero<ValueType>();
this->offsetsToOverApproximation[objIndex] = storm::utility::zero<ValueType>();
@ -149,79 +150,74 @@ namespace storm {
template <class SparseMdpModelType>
void SparseMdpPcaaWeightVectorChecker<SparseMdpModelType>::computeEpochSolution(typename MultiDimensionalRewardUnfolding<ValueType>::Epoch const& epoch, std::vector<ValueType> const& weightVector) {
auto const& epochModel = rewardUnfolding->getModelForEpoch(epoch);
typename MultiDimensionalRewardUnfolding<ValueType>::EpochSolution result;
result.weightedValues.resize(epochModel.intermediateTransitions.getRowGroupCount());
auto const& epochModel = rewardUnfolding->setCurrentEpoch(epoch);
std::vector<typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType> result(epochModel.epochMatrix.getRowGroupCount());
// Formulate a min-max equation system max(A*x+b)=x for the weighted sum of the objectives
std::vector<ValueType> b(epochModel.rewardChoices.size(), storm::utility::zero<ValueType>());
std::vector<ValueType> b(epochModel.epochMatrix.getRowCount(), storm::utility::zero<ValueType>());
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
ValueType const& weight = weightVector[objIndex];
ValueType weight = storm::solver::minimize(this->objectives[objIndex].formula->getOptimalityType()) ? -weightVector[objIndex] : weightVector[objIndex];
if (!storm::utility::isZero(weight)) {
std::vector<ValueType> const& objectiveReward = epochModel.objectiveRewards[objIndex];
std::cout << "ObjRew filter for obj #" << objIndex << " is " << epochModel.objectiveRewardFilter[objIndex] << std::endl;
for (auto const& choice : epochModel.objectiveRewardFilter[objIndex]) {
b[choice] += weight * objectiveReward[choice];
}
}
}
for (auto const& choice : epochModel.rewardChoices) {
typename MultiDimensionalRewardUnfolding<ValueType>::Epoch const& step = epochModel.epochSteps[choice].get();
assert(step.size() == epoch.size());
typename MultiDimensionalRewardUnfolding<ValueType>::Epoch targetEpoch;
targetEpoch.reserve(epoch.size());
for (auto eIt = epoch.begin(), sIt = step.begin(); eIt != epoch.end(); ++eIt, ++sIt) {
targetEpoch.push_back(std::max(*eIt - *sIt, (int64_t) -1));
}
b[choice] = epochModel.rewardTransitions.multiplyRowWithVector(choice, rewardUnfolding->getEpochSolution(targetEpoch).weightedValues);
auto stepSolutionIt = epochModel.stepSolutions.begin();
for (auto const& choice : epochModel.stepChoices) {
b[choice] += stepSolutionIt->weightedValue;
++stepSolutionIt;
}
std::cout << "MinMax b is " << storm::utility::vector::toString(b) << std::endl;
// Invoke the min max solver
storm::solver::GeneralMinMaxLinearEquationSolverFactory<ValueType> minMaxSolverFactory;
auto minMaxSolver = minMaxSolverFactory.create(epochModel.intermediateTransitions);
auto minMaxSolver = minMaxSolverFactory.create(epochModel.epochMatrix);
minMaxSolver->setOptimizationDirection(storm::solver::OptimizationDirection::Maximize);
minMaxSolver->setTrackScheduler(true);
//minMaxSolver->setCachingEnabled(true);
minMaxSolver->solveEquations(result.weightedValues, b);
std::vector<ValueType> x(result.size(), storm::utility::zero<ValueType>());
minMaxSolver->solveEquations(x, b);
for (uint64_t state = 0; state < x.size(); ++state) {
result[state].weightedValue = x[state];
}
// Formulate for each objective the linear equation system induced by the performed choices
auto const& choices = minMaxSolver->getSchedulerChoices();
storm::storage::SparseMatrix<ValueType> subMatrix = epochModel.intermediateTransitions.selectRowsFromRowGroups(choices, true);
storm::storage::SparseMatrix<ValueType> subMatrix = epochModel.epochMatrix.selectRowsFromRowGroups(choices, true);
subMatrix.convertToEquationSystem();
storm::solver::GeneralLinearEquationSolverFactory<ValueType> linEqSolverFactory;
auto linEqSolver = linEqSolverFactory.create(std::move(subMatrix));
b.resize(choices.size());
result.objectiveValues.reserve(this->objectives.size());
// TODO: start with a better initial guess
x.resize(choices.size());
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
std::vector<ValueType> const& objectiveReward = epochModel.objectiveRewards[objIndex];
// TODO: start with a better initial guess
result.objectiveValues.emplace_back(choices.size(), storm::utility::convertNumber<ValueType>(0.5));
for (uint64_t state = 0; state < choices.size(); ++state) {
uint64_t choice = epochModel.intermediateTransitions.getRowGroupIndices()[state] + choices[state];
uint64_t choice = epochModel.epochMatrix.getRowGroupIndices()[state] + choices[state];
if (epochModel.objectiveRewardFilter[objIndex].get(choice)) {
b[state] = objectiveReward[choice];
} else {
b[state] = storm::utility::zero<ValueType>();
}
if (epochModel.rewardChoices.get(choice)) {
typename MultiDimensionalRewardUnfolding<ValueType>::Epoch const& step = epochModel.epochSteps[choice].get();
assert(step.size() == epoch.size());
typename MultiDimensionalRewardUnfolding<ValueType>::Epoch targetEpoch;
targetEpoch.reserve(epoch.size());
for (auto eIt = epoch.begin(), sIt = step.begin(); eIt != epoch.end(); ++eIt, ++sIt) {
targetEpoch.push_back(std::max(*eIt - *sIt, (int64_t) -1));
}
b[state] += epochModel.rewardTransitions.multiplyRowWithVector(choice, rewardUnfolding->getEpochSolution(targetEpoch).objectiveValues[objIndex]);
if (epochModel.stepChoices.get(choice)) {
b[state] += epochModel.stepSolutions[epochModel.stepChoices.getNumberOfSetBitsBeforeIndex(choice)].objectiveValues[objIndex];
}
}
linEqSolver->solveEquations(result.objectiveValues.back(), b);
std::cout << "LinEq b is " << storm::utility::vector::toString(b) << std::endl;
linEqSolver->solveEquations(x, b);
for (uint64_t state = 0; state < choices.size(); ++state) {
result[state].objectiveValues.push_back(x[state]);
}
}
rewardUnfolding->setEpochSolution(epoch, std::move(result));
rewardUnfolding->setSolutionForCurrentEpoch(result);
}
template class SparseMdpPcaaWeightVectorChecker<storm::models::sparse::Mdp<double>>;
#ifdef STORM_HAVE_CARL
template class SparseMdpPcaaWeightVectorChecker<storm::models::sparse::Mdp<storm::RationalNumber>>;

570
src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.cpp

@ -8,6 +8,7 @@
#include "storm/modelchecker/propositional/SparsePropositionalModelChecker.h"
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
#include "storm/transformer/EndComponentEliminator.h"
#include "storm/exceptions/UnexpectedException.h"
#include "storm/exceptions/IllegalArgumentException.h"
@ -28,6 +29,7 @@ namespace storm {
void MultiDimensionalRewardUnfolding<ValueType>::initialize() {
// collect the time-bounded subobjectives
std::vector<std::vector<uint64_t>> epochSteps;
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
auto const& formula = *this->objectives[objIndex].formula;
if (formula.isProbabilityOperatorFormula()) {
@ -49,7 +51,7 @@ namespace storm {
}
memoryLabels.push_back(memLabel);
if (boundedUntilFormula.getTimeBoundReference(dim).isTimeBound() || boundedUntilFormula.getTimeBoundReference(dim).isStepBound()) {
scaledRewards.push_back(std::vector<uint64_t>(model.getNumberOfChoices(), 1));
epochSteps.push_back(std::vector<uint64_t>(model.getNumberOfChoices(), 1));
scalingFactors.push_back(storm::utility::one<ValueType>());
} else {
STORM_LOG_ASSERT(boundedUntilFormula.getTimeBoundReference(dim).isRewardBound(), "Unexpected type of time bound.");
@ -60,8 +62,8 @@ namespace storm {
std::vector<ValueType> actionRewards = rewardModel.getTotalRewardVector(this->model.getTransitionMatrix());
std::cout << "action rewards " << storm::utility::vector::toString(actionRewards) << std::endl;
auto discretizedRewardsAndFactor = storm::utility::vector::toIntegralVector<ValueType, uint64_t>(actionRewards);
scaledRewards.push_back(std::move(discretizedRewardsAndFactor.first));
std::cout << "scaled rewards " << storm::utility::vector::toString(scaledRewards.back()) << std::endl;
epochSteps.push_back(std::move(discretizedRewardsAndFactor.first));
std::cout << "scaled rewards " << storm::utility::vector::toString(epochSteps.back()) << std::endl;
scalingFactors.push_back(std::move(discretizedRewardsAndFactor.second));
}
}
@ -69,7 +71,7 @@ namespace storm {
}
} else if (formula.isRewardOperatorFormula() && formula.getSubformula().isCumulativeRewardFormula()) {
subObjectives.push_back(std::make_pair(formula.getSubformula().asSharedPointer(), objIndex));
scaledRewards.push_back(std::vector<uint64_t>(model.getNumberOfChoices(), 1));
epochSteps.push_back(std::vector<uint64_t>(model.getNumberOfChoices(), 1));
scalingFactors.push_back(storm::utility::one<ValueType>());
memoryLabels.push_back(boost::none);
}
@ -85,6 +87,89 @@ namespace storm {
}
objectiveDimensions.push_back(std::move(dimensions));
}
// collect which epoch steps are possible
possibleEpochSteps.clear();
for (uint64_t choiceIndex = 0; choiceIndex < epochSteps.front().size(); ++choiceIndex) {
Epoch step;
step.reserve(epochSteps.size());
for (auto const& dimensionRewards : epochSteps) {
step.push_back(dimensionRewards[choiceIndex]);
}
possibleEpochSteps.insert(step);
}
std::cout << "epoch steps are ..." << std::endl;
for (uint64_t dim = 0; dim < epochSteps.size(); ++dim) {
std::cout << " dim " << dim << " : " << storm::utility::vector::toString(epochSteps[dim]) << std::endl;
}
// build the model x memory product
auto memoryStructure = computeMemoryStructure();
memoryStateMap = computeMemoryStateMap(memoryStructure);
productBuilder = std::make_shared<storm::storage::SparseModelMemoryProduct<ValueType>>(memoryStructure.product(model));
// todo: we only need to build the reachable states + the full model for each memory state encoding that all subObjectives of an objective are irrelevant
productBuilder->setBuildFullProduct();
modelMemoryProduct = productBuilder->build()->template as<storm::models::sparse::Mdp<ValueType>>();
std::cout << "Orig model Transitions: " << std::endl << model.getTransitionMatrix() << std::endl;
std::cout << "Memory: " << std::endl << memoryStructure.toString() << std::endl;
std::cout << "Product transitions: " << std::endl << modelMemoryProduct->getTransitionMatrix() << std::endl;
productEpochSteps.resize(modelMemoryProduct->getNumberOfChoices());
for (uint64_t modelState = 0; modelState < model.getNumberOfStates(); ++modelState) {
uint64_t numChoices = model.getTransitionMatrix().getRowGroupSize(modelState);
uint64_t firstChoice = model.getTransitionMatrix().getRowGroupIndices()[modelState];
for (uint64_t choiceOffset = 0; choiceOffset < numChoices; ++choiceOffset) {
Epoch step;
bool isZeroStep = true;
for (uint64_t dim = 0; dim < epochSteps.size(); ++dim) {
step.push_back(epochSteps[dim][firstChoice + choiceOffset]);
isZeroStep = isZeroStep && step.back() == 0;
}
std::cout << "step #" << modelState << "." << choiceOffset << " is " << storm::utility::vector::toString(step) << std::endl;
if (!isZeroStep) {
std::cout << " (non-zero step)" << std::endl;
for (uint64_t memState = 0; memState < memoryStateMap.size(); ++memState) {
uint64_t productState = getProductState(modelState, memState);
uint64_t productChoice = modelMemoryProduct->getTransitionMatrix().getRowGroupIndices()[productState] + choiceOffset;
assert(productChoice < modelMemoryProduct->getTransitionMatrix().getRowGroupIndices()[productState + 1]);
productEpochSteps[productChoice] = step;
std::cout << " set step at product choice " << productChoice << std::endl;
}
}
}
}
modelStates.resize(modelMemoryProduct->getNumberOfStates());
memoryStates.resize(modelMemoryProduct->getNumberOfStates());
for (uint64_t modelState = 0; modelState < model.getNumberOfStates(); ++modelState) {
for (uint64_t memoryState = 0; memoryState < memoryStructure.getNumberOfStates(); ++memoryState) {
uint64_t productState = getProductState(modelState, memoryState);
modelStates[productState] = modelState;
memoryStates[productState] = memoryState;
}
}
productChoiceToStateMapping.clear();
productChoiceToStateMapping.reserve(modelMemoryProduct->getNumberOfChoices());
for (uint64_t productState = 0; productState < modelMemoryProduct->getNumberOfStates(); ++productState) {
uint64_t groupSize = modelMemoryProduct->getTransitionMatrix().getRowGroupSize(productState);
for (uint64_t i = 0; i < groupSize; ++i) {
productChoiceToStateMapping.push_back(productState);
}
}
productAllowedBottomStates = storm::storage::BitVector(modelMemoryProduct->getNumberOfStates(), true);
for (auto const& modelState : allowedBottomStates) {
for (uint64_t memoryState = 0; memoryState < memoryStateMap.size(); ++memoryState) {
productAllowedBottomStates.set(getProductState(modelState, memoryState), true);
}
}
}
template<typename ValueType>
@ -119,16 +204,6 @@ namespace storm {
template<typename ValueType>
std::vector<typename MultiDimensionalRewardUnfolding<ValueType>::Epoch> MultiDimensionalRewardUnfolding<ValueType>::getEpochComputationOrder(Epoch const& startEpoch) {
// collect which 'epoch steps' are possible
std::set<Epoch> steps;
for (uint64_t choiceIndex = 0; choiceIndex < scaledRewards.front().size(); ++choiceIndex) {
Epoch step;
step.reserve(scaledRewards.size());
for (auto const& dimensionRewards : scaledRewards) {
step.push_back(dimensionRewards[choiceIndex]);
}
steps.insert(step);
}
// perform DFS to get the 'reachable' epochs in the correct order.
std::vector<Epoch> result, dfsStack;
@ -137,7 +212,7 @@ namespace storm {
dfsStack.push_back(startEpoch);
while (!dfsStack.empty()) {
bool hasUnseenSuccessor = false;
for (auto const& step : steps) {
for (auto const& step : possibleEpochSteps) {
Epoch successorEpoch = getSuccessorEpoch(dfsStack.back(), step);
if (seenEpochs.find(successorEpoch) == seenEpochs.end()) {
seenEpochs.insert(successorEpoch);
@ -146,6 +221,7 @@ namespace storm {
}
}
if (!hasUnseenSuccessor) {
std::cout << "ADDING EPOCH to computation order: " << storm::utility::vector::toString(dfsStack.back()) << std::endl;
result.push_back(std::move(dfsStack.back()));
dfsStack.pop_back();
}
@ -153,111 +229,178 @@ namespace storm {
return result;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::EpochModel const& MultiDimensionalRewardUnfolding<ValueType>::getModelForEpoch(Epoch const& epoch) {
// Get the model for the considered class of epochs
EpochClass classOfEpoch = getClassOfEpoch(epoch);
auto findRes = epochModels.find(classOfEpoch);
std::shared_ptr<EpochModel> epochModel;
if (findRes != epochModels.end()) {
epochModel = findRes->second;
} else {
epochModel = epochModels.insert(std::make_pair(classOfEpoch, computeModelForEpoch(epoch))).first->second;
typename MultiDimensionalRewardUnfolding<ValueType>::EpochModel const& MultiDimensionalRewardUnfolding<ValueType>::setCurrentEpoch(Epoch const& epoch) {
std::cout << "Setting epoch to " << storm::utility::vector::toString(epoch) << std::endl;
// Check if we need to update the current epoch class
if (!currentEpoch || getClassOfEpoch(epoch) != getClassOfEpoch(currentEpoch.get())) {
setCurrentEpochClass(epoch);
}
// Filter out all objective rewards that we do not receive in this particular epoch
epochModel->objectiveRewardFilter.resize(objectives.size());
for (uint64_t objIndex = 0; objIndex < objectives.size(); ++objIndex) {
storm::storage::BitVector& filter = epochModel->objectiveRewardFilter[objIndex];
filter.resize(epochModel->objectiveRewards[objIndex].size());
if (objectiveDimensions[objIndex].empty()) {
filter.clear();
filter.complement();
} else {
for (uint64_t state = 0; state < epochModel->rewardTransitions.getRowGroupCount(); ++state) {
for (uint64_t choice = epochModel->rewardTransitions.getRowGroupIndices()[state]; choice < epochModel->rewardTransitions.getRowGroupIndices()[state + 1]; ++choice) {
for (auto const& dim : objectiveDimensions[objIndex]) {
auto successorEpoch = epoch[dim] - (epochModel->epochSteps[choice].is_initialized() ? epochModel->epochSteps[choice].get()[dim] : 0);
if (successorEpoch >= 0) {
filter.set(choice, true);
} else if (epochModel->relevantStates[dim].get(state)) {
filter.set(choice, false);
break;
} else {
filter.set(choice, true);
}
}
}
// Find out which objective rewards are earned in this particular epoch
epochModel.objectiveRewardFilter = std::vector<storm::storage::BitVector>(objectives.size(), storm::storage::BitVector(epochModel.objectiveRewards.front().size(), true));
for (auto const& reducedChoice : epochModel.stepChoices) {
uint64_t productChoice = ecElimResult.newToOldRowMapping[reducedChoice];
storm::storage::BitVector memoryState = convertMemoryState(getMemoryState(productChoiceToStateMapping[productChoice]));
Epoch successorEpoch = getSuccessorEpoch(epoch, productEpochSteps[productChoice].get());
for (uint64_t dim = 0; dim < successorEpoch.size(); ++dim) {
if (successorEpoch[dim] < 0 && memoryState.get(dim)) {
epochModel.objectiveRewardFilter[subObjectives[dim].second].set(reducedChoice, false);
}
}
}
std::cout << "Epoch model for epoch: " << storm::utility::vector::toString(epoch) << std::endl;
std::cout << "Reward choices in this epoch are " << epochModel->rewardChoices << std::endl;
// compute the solution for the stepChoices
epochModel.stepSolutions.resize(epochModel.stepChoices.getNumberOfSetBits());
auto stepSolIt = epochModel.stepSolutions.begin();
for (auto const& reducedChoice : epochModel.stepChoices) {
uint64_t productChoice = ecElimResult.newToOldRowMapping[reducedChoice];
SolutionType choiceSolution = getZeroSolution();
Epoch successorEpoch = getSuccessorEpoch(epoch, productEpochSteps[productChoice].get());
storm::storage::BitVector greaterZeroDimensions = storm::utility::vector::filter<int64_t>(successorEpoch, [] (int64_t const& e) -> bool { return e >= 0; });
for (auto const& successor : modelMemoryProduct->getTransitionMatrix().getRow(productChoice)) {
storm::storage::BitVector successorMemoryState = convertMemoryState(getMemoryState(successor.getColumn())) & greaterZeroDimensions;
uint64_t successorProductState = getProductState(getModelState(successor.getColumn()), convertMemoryState(successorMemoryState));
SolutionType const& successorSolution = getStateSolution(successorEpoch, successorProductState);
addScaledSolution(choiceSolution, successorSolution, successor.getValue());
}
*stepSolIt = std::move(choiceSolution);
++stepSolIt;
}
assert(epochModel.objectiveRewards.size() == objectives.size());
assert(epochModel.objectiveRewardFilter.size() == objectives.size());
assert(epochModel.epochMatrix.getRowCount() == epochModel.stepChoices.size());
assert(epochModel.stepChoices.size() == epochModel.objectiveRewards.front().size());
assert(epochModel.objectiveRewards.front().size() == epochModel.objectiveRewards.back().size());
assert(epochModel.objectiveRewards.front().size() == epochModel.objectiveRewardFilter.front().size());
assert(epochModel.objectiveRewards.back().size() == epochModel.objectiveRewardFilter.back().size());
assert(epochModel.stepChoices.getNumberOfSetBits() == epochModel.stepSolutions.size());
currentEpoch = epoch;
std::cout << "epoch matrix is " << std::endl << epochModel.epochMatrix << std::endl;
return epochModel;
return *epochModel;
}
template<typename ValueType>
std::shared_ptr<typename MultiDimensionalRewardUnfolding<ValueType>::EpochModel> MultiDimensionalRewardUnfolding<ValueType>::computeModelForEpoch(Epoch const& epoch) {
void MultiDimensionalRewardUnfolding<ValueType>::setCurrentEpochClass(Epoch const& epoch) {
std::cout << "Setting class for epoch " << storm::utility::vector::toString(epoch) << std::endl;
storm::storage::MemoryStructure memory = computeMemoryStructureForEpoch(epoch);
auto modelMemoryProductBuilder = memory.product(model);
modelMemoryProductBuilder.setBuildFullProduct();
auto modelMemoryProduct = modelMemoryProductBuilder.build()->template as<storm::models::sparse::Mdp<ValueType>>();
auto productObjectiveRewards = computeObjectiveRewardsForProduct(epoch);
storm::storage::SparseMatrix<ValueType> const& allTransitions = modelMemoryProduct->getTransitionMatrix();
std::shared_ptr<EpochModel> result = std::make_shared<EpochModel>();
result->rewardChoices = storm::storage::BitVector(allTransitions.getRowCount(), false);
result->epochSteps.resize(modelMemoryProduct->getNumberOfChoices());
for (uint64_t modelState = 0; modelState < model.getNumberOfStates(); ++modelState) {
uint64_t numChoices = model.getTransitionMatrix().getRowGroupSize(modelState);
uint64_t firstChoice = model.getTransitionMatrix().getRowGroupIndices()[modelState];
for (uint64_t choiceOffset = 0; choiceOffset < numChoices; ++choiceOffset) {
Epoch step;
bool isZeroStep = true;
for (uint64_t dim = 0; dim < epoch.size(); ++dim) {
std::cout << " scaled rewards " << storm::utility::vector::toString(scaledRewards[dim]) << std::endl;
step.push_back(scaledRewards[dim][firstChoice + choiceOffset]);
isZeroStep = isZeroStep && (step.back() == 0 || epoch[dim] < 0);
}
if (!isZeroStep) {
for (uint64_t memState = 0; memState < memory.getNumberOfStates(); ++memState) {
uint64_t productState = modelMemoryProductBuilder.getResultState(modelState, memState);
uint64_t productChoice = allTransitions.getRowGroupIndices()[productState] + choiceOffset;
assert(productChoice < allTransitions.getRowGroupIndices()[productState + 1]);
result->epochSteps[productChoice] = step;
result->rewardChoices.set(productChoice, true);
storm::storage::BitVector stepChoices(modelMemoryProduct->getNumberOfChoices(), false);
uint64_t choice = 0;
for (auto const& step : productEpochSteps) {
if (step) {
std::cout << " step at choice " << choice << " is " << storm::utility::vector::toString(step.get()) << std::endl;
auto eIt = epoch.begin();
for (auto const& s : step.get()) {
if (s != 0 && *eIt >= 0) {
stepChoices.set(choice, true);
break;
}
++eIt;
}
}
++choice;
}
std::cout << "step choices is " << stepChoices << std::endl;
std::cout << "Matrix before filter: " << std::endl << modelMemoryProduct->getTransitionMatrix() << std::endl << std::endl;
epochModel.epochMatrix = modelMemoryProduct->getTransitionMatrix().filterEntries(~stepChoices);
result->rewardTransitions = allTransitions.filterEntries(result->rewardChoices);
result->intermediateTransitions = allTransitions.filterEntries(~result->rewardChoices);
result->objectiveRewards = computeObjectiveRewardsForEpoch(epoch, modelMemoryProduct);
storm::modelchecker::SparsePropositionalModelChecker<storm::models::sparse::Mdp<ValueType>> mc(*modelMemoryProduct);
result->relevantStates.reserve(subObjectives.size());
for (auto const& relevantStatesLabel : memoryLabels) {
if (relevantStatesLabel) {
auto relevantStatesFormula = std::make_shared<storm::logic::AtomicLabelFormula>(relevantStatesLabel.get());
result->relevantStates.push_back(mc.check(*relevantStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector());
} else {
result->relevantStates.push_back(storm::storage::BitVector(modelMemoryProduct->getNumberOfStates(), true));
storm::storage::BitVector zeroObjRewardChoices(modelMemoryProduct->getNumberOfChoices(), true);
for (auto const& objRewards : productObjectiveRewards) {
std::cout << "objectiveRewards: " << storm::utility::vector::toString(objRewards) << std::endl;
zeroObjRewardChoices &= storm::utility::vector::filterZero(objRewards);
}
std::cout << "Matrix before ec elim: " << std::endl << epochModel.epochMatrix << std::endl << std::endl;
std::cout << "zeroObjRewChoices: " << zeroObjRewardChoices << std::endl;
std::cout << "productAllowedBottomStates: " << productAllowedBottomStates << std::endl;
ecElimResult = storm::transformer::EndComponentEliminator<ValueType>::transform(epochModel.epochMatrix, storm::storage::BitVector(modelMemoryProduct->getNumberOfStates(), true), zeroObjRewardChoices & ~stepChoices, productAllowedBottomStates);
epochModel.epochMatrix = std::move(ecElimResult.matrix);
std::cout << "matrix redu: " << std::endl << epochModel.epochMatrix << std::endl;
epochModel.stepChoices = storm::storage::BitVector(epochModel.epochMatrix.getRowCount(), false);
for (uint64_t choice = 0; choice < epochModel.epochMatrix.getRowCount(); ++choice) {
if (stepChoices.get(ecElimResult.newToOldRowMapping[choice])) {
epochModel.stepChoices.set(choice, true);
}
}
std::cout << "step choices orig: " << stepChoices << std::endl;
std::cout << "step choices redu: " << epochModel.stepChoices << std::endl;
STORM_LOG_ASSERT(epochModel.stepChoices.getNumberOfSetBits() == stepChoices.getNumberOfSetBits(), "The number of choices leading outside of the epoch does not match for the reduced and unreduced epochMatrix");
result->initialStates = modelMemoryProduct->getInitialStates();
epochModel.objectiveRewards.clear();
for (auto const& productObjRew : productObjectiveRewards) {
std::vector<ValueType> reducedModelObjRewards;
reducedModelObjRewards.reserve(epochModel.epochMatrix.getRowCount());
for (auto const& productChoice : ecElimResult.newToOldRowMapping) {
reducedModelObjRewards.push_back(productObjRew[productChoice]);
}
epochModel.objectiveRewards.push_back(std::move(reducedModelObjRewards));
}
return result;
}
template<typename ValueType>
storm::storage::MemoryStructure MultiDimensionalRewardUnfolding<ValueType>::computeMemoryStructureForEpoch(Epoch const& epoch) const {
typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType MultiDimensionalRewardUnfolding<ValueType>::getZeroSolution() const {
SolutionType res;
res.weightedValue = storm::utility::zero<ValueType>();
res.objectiveValues = std::vector<ValueType>(objectives.size(), storm::utility::zero<ValueType>());
return res;
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::addScaledSolution(SolutionType& solution, SolutionType const& solutionToAdd, ValueType const& scalingFactor) const {
solution.weightedValue += solutionToAdd.weightedValue * scalingFactor;
storm::utility::vector::addScaledVector(solution.objectiveValues, solutionToAdd.objectiveValues, scalingFactor);
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setSolutionForCurrentEpoch(std::vector<SolutionType> const& reducedModelStateSolutions) {
for (uint64_t productState = 0; productState < modelMemoryProduct->getNumberOfStates(); ++productState) {
uint64_t reducedModelState = ecElimResult.oldToNewStateMapping[productState];
if (reducedModelState < reducedModelStateSolutions.size()) {
setSolutionForCurrentEpoch(productState, reducedModelStateSolutions[reducedModelState]);
}
}
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setSolutionForCurrentEpoch(uint64_t const& productState, SolutionType const& solution) {
STORM_LOG_ASSERT(currentEpoch, "Tried to set a solution for the current epoch, but no epoch was specified before.");
std::vector<int64_t> solutionKey = currentEpoch.get();
solutionKey.push_back(productState);
solutions[solutionKey] = solution;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType const& MultiDimensionalRewardUnfolding<ValueType>::getStateSolution(Epoch const& epoch, uint64_t const& productState) const {
std::vector<int64_t> solutionKey = epoch;
solutionKey.push_back(productState);
auto solutionIt = solutions.find(solutionKey);
STORM_LOG_ASSERT(solutionIt != solutions.end(), "Requested unexisting solution for epoch " << storm::utility::vector::toString(epoch) << ".");
return solutionIt->second;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType const& MultiDimensionalRewardUnfolding<ValueType>::getInitialStateResult(Epoch const& epoch) const {
return getStateSolution(epoch, *modelMemoryProduct->getInitialStates().begin());
}
template<typename ValueType>
storm::storage::MemoryStructure MultiDimensionalRewardUnfolding<ValueType>::computeMemoryStructure() const {
storm::modelchecker::SparsePropositionalModelChecker<storm::models::sparse::Mdp<ValueType>> mc(model);
@ -268,6 +411,82 @@ namespace storm {
continue;
}
std::vector<uint64_t> dimensionIndexMap;
for (auto const& globalDimensionIndex : objectiveDimensions[objIndex]) {
dimensionIndexMap.push_back(globalDimensionIndex);
}
// collect the memory states for this objective
std::vector<storm::storage::BitVector> objMemStates;
storm::storage::BitVector m(dimensionIndexMap.size(), false);
for (; !m.full(); m.increment()) {
objMemStates.push_back(~m);
}
objMemStates.push_back(~m);
assert(objMemStates.size() == 1ull << dimensionIndexMap.size());
// build objective memory
auto objMemoryBuilder = storm::storage::MemoryStructureBuilder<ValueType>(objMemStates.size(), model);
// Get the set of states that for all subobjectives satisfy either the left or the right subformula
storm::storage::BitVector constraintStates(model.getNumberOfStates(), true);
for (auto const& dim : objectiveDimensions[objIndex]) {
auto const& subObj = subObjectives[dim];
STORM_LOG_ASSERT(subObj.first->isBoundedUntilFormula(), "Unexpected Formula type");
constraintStates &=
(mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector() |
mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector());
}
// Build the transitions between the memory states
for (uint64_t memState = 0; memState < objMemStates.size(); ++memState) {
auto const& memStateBV = objMemStates[memState];
for (uint64_t memStatePrime = 0; memStatePrime < objMemStates.size(); ++memStatePrime) {
auto const& memStatePrimeBV = objMemStates[memStatePrime];
if (memStatePrimeBV.isSubsetOf(memStateBV)) {
std::shared_ptr<storm::logic::Formula const> transitionFormula = storm::logic::Formula::getTrueFormula();
for (auto const& subObjIndex : memStateBV) {
std::shared_ptr<storm::logic::Formula const> subObjFormula = subObjectives[dimensionIndexMap[subObjIndex]].first->asBoundedUntilFormula().getRightSubformula().asSharedPointer();
if (memStatePrimeBV.get(subObjIndex)) {
subObjFormula = std::make_shared<storm::logic::UnaryBooleanStateFormula>(storm::logic::UnaryBooleanStateFormula::OperatorType::Not, subObjFormula);
}
transitionFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, transitionFormula, subObjFormula);
}
storm::storage::BitVector transitionStates = mc.check(*transitionFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
if (memStatePrimeBV.empty()) {
transitionStates |= ~constraintStates;
} else {
transitionStates &= constraintStates;
}
objMemoryBuilder.setTransition(memState, memStatePrime, transitionStates);
// Set the initial states
if (memStateBV.full()) {
storm::storage::BitVector initialTransitionStates = model.getInitialStates() & transitionStates;
// At this point we can check whether there is an initial state that already satisfies all subObjectives.
// Such a situation is not supported as we can not reduce this (easily) to an expected reward computation.
STORM_LOG_THROW(!memStatePrimeBV.empty() || initialTransitionStates.empty() || initialTransitionStates.isDisjointFrom(constraintStates), storm::exceptions::NotSupportedException, "The objective " << *objectives[objIndex].formula << " is already satisfied in an initial state. This special case is not supported.");
for (auto const& initState : initialTransitionStates) {
objMemoryBuilder.setInitialMemoryState(initState, memStatePrime);
}
}
}
}
}
// Build the memory labels
for (uint64_t memState = 0; memState < objMemStates.size(); ++memState) {
auto const& memStateBV = objMemStates[memState];
for (auto const& subObjIndex : memStateBV) {
objMemoryBuilder.setLabel(memState, memoryLabels[dimensionIndexMap[subObjIndex]].get());
}
}
/* Old (wrong.. ) implementation
storm::storage::MemoryStructure objMemory = storm::storage::MemoryStructureBuilder<ValueType>::buildTrivialMemoryStructure(model);
for (auto const& dim : objectiveDimensions[objIndex]) {
@ -276,52 +495,98 @@ namespace storm {
// Create a memory structure that stores whether a PsiState has already been reached
storm::storage::MemoryStructureBuilder<ValueType> subObjMemBuilder(2, model);
subObjMemBuilder.setLabel(0, memoryLabels[dim].get());
storm::storage::BitVector leftSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
storm::storage::BitVector rightSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
if (epoch[dim] >= 0) {
storm::storage::BitVector rightSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
subObjMemBuilder.setTransition(0, 0, ~rightSubformulaResult);
subObjMemBuilder.setTransition(0, 1, rightSubformulaResult);
subObjMemBuilder.setTransition(1, 1, storm::storage::BitVector(model.getNumberOfStates(), true));
for (auto const& initState : model.getInitialStates()) {
subObjMemBuilder.setInitialMemoryState(initState, rightSubformulaResult.get(initState) ? 1 : 0);
}
} else {
subObjMemBuilder.setTransition(0, 0, storm::storage::BitVector(model.getNumberOfStates(), true));
subObjMemBuilder.setTransition(1, 1, storm::storage::BitVector(model.getNumberOfStates(), true));
subObjMemBuilder.setTransition(0, 0, leftSubformulaResult & ~rightSubformulaResult);
subObjMemBuilder.setTransition(0, 1, rightSubformulaResult);
subObjMemBuilder.setTransition(1, 1, storm::storage::BitVector(model.getNumberOfStates(), true));
for (auto const& initState : model.getInitialStates()) {
subObjMemBuilder.setInitialMemoryState(initState, rightSubformulaResult.get(initState) ? 1 : 0);
}
storm::storage::MemoryStructure subObjMem = subObjMemBuilder.build();
std::cout << " subObjMem: " << std::endl << subObjMem.toString() << std::endl;
objMemory = objMemory.product(subObjMem);
}
}
std::cout << " objMemory before: " << std::endl << objMemory.toString() << std::endl;
// find the memory state that represents that none of the subobjective is relative anymore
storm::storage::BitVector sinkStates(objMemory.getNumberOfStates(), true);
// find the memory state that represents that all subObjectives are decided (i.e., Psi_i has been reached for all i)
storm::storage::BitVector decidedState(objMemory.getNumberOfStates(), true);
for (auto const& dim : objectiveDimensions[objIndex]) {
sinkStates = sinkStates & ~objMemory.getStateLabeling().getStates(memoryLabels[dim].get());
decidedState = decidedState & ~objMemory.getStateLabeling().getStates(memoryLabels[dim].get());
}
assert(sinkStates.getNumberOfSetBits() == 1);
assert(decidedState.getNumberOfSetBits() == 1);
// When a constraint of at least one until formula is violated, we need to switch to the sink memory state
// When the set of PhiStates is left for at least one until formula, we switch to the decidedState
storm::storage::MemoryStructureBuilder<ValueType> objMemBuilder(objMemory, model);
for (auto const& dim : objectiveDimensions[objIndex]) {
auto const& subObj = subObjectives[dim];
storm::storage::BitVector constraintModelStates =
~(mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector() |
mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector());
mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector() |
mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
for (auto const& maybeState : objMemory.getStateLabeling().getStates(memoryLabels[dim].get())) {
objMemBuilder.setTransition(maybeState, *sinkStates.begin(), constraintModelStates);
objMemBuilder.setTransition(maybeState, *decidedState.begin(), ~constraintModelStates);
}
}
objMemory = objMemBuilder.build();
*/
auto objMemory = objMemoryBuilder.build();
std::cout << " objMemory : " << std::endl << objMemory.toString() << std::endl;
memory = memory.product(objMemory);
}
std::cout << " final memory: " << std::endl << memory.toString() << std::endl;
return memory;
}
template<typename ValueType>
std::vector<storm::storage::BitVector> MultiDimensionalRewardUnfolding<ValueType>::computeMemoryStateMap(storm::storage::MemoryStructure const& memory) const {
std::vector<storm::storage::BitVector> result;
for (uint64_t memState = 0; memState < memory.getNumberOfStates(); ++memState) {
storm::storage::BitVector relevantSubObjectives(subObjectives.size(), false);
std::set<std::string> stateLabels = memory.getStateLabeling().getLabelsOfState(memState);
for (uint64_t dim = 0; dim < subObjectives.size(); ++dim) {
if (memoryLabels[dim] && stateLabels.find(memoryLabels[dim].get()) != stateLabels.end()) {
relevantSubObjectives.set(dim, true);
}
}
result.push_back(std::move(relevantSubObjectives));
}
return result;
}
template<typename ValueType>
storm::storage::BitVector const& MultiDimensionalRewardUnfolding<ValueType>::convertMemoryState(uint64_t const& memoryState) const {
return memoryStateMap[memoryState];
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::convertMemoryState(storm::storage::BitVector const& memoryState) const {
auto memStateIt = std::find(memoryStateMap.begin(), memoryStateMap.end(), memoryState);
return memStateIt - memoryStateMap.begin();
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::getProductState(uint64_t const& modelState, uint64_t const& memoryState) const {
uint64_t productState = productBuilder->getResultState(modelState, memoryState);
STORM_LOG_ASSERT(productState < modelMemoryProduct->getNumberOfStates(), "There is no state in the model-memory-product that corresponds to model state " << modelState << " and memory state " << memoryState << ".");
return productState;
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::getModelState(uint64_t const& productState) const {
return modelStates[productState];
}
template<typename ValueType>
std::vector<std::vector<ValueType>> MultiDimensionalRewardUnfolding<ValueType>::computeObjectiveRewardsForEpoch(Epoch const& epoch, std::shared_ptr<storm::models::sparse::Mdp<ValueType>> const& modelMemoryProduct) const {
uint64_t MultiDimensionalRewardUnfolding<ValueType>::getMemoryState(uint64_t const& productState) const {
return memoryStates[productState];
}
template<typename ValueType>
std::vector<std::vector<ValueType>> MultiDimensionalRewardUnfolding<ValueType>::computeObjectiveRewardsForProduct(Epoch const& epoch) const {
std::vector<std::vector<ValueType>> objectiveRewards;
objectiveRewards.reserve(objectives.size());
@ -350,35 +615,45 @@ namespace storm {
while (!relevantObjectives.full()) {
relevantObjectives.increment();
std::shared_ptr<storm::logic::Formula const> relevantStatesFormula;
std::shared_ptr<storm::logic::Formula const> goalStatesFormula = storm::logic::CloneVisitor().clone(*sinkStatesFormula);
for (uint64_t subObjIndex = 0; subObjIndex < dimensionIndexMap.size(); ++subObjIndex) {
std::shared_ptr<storm::logic::Formula> memLabelFormula = std::make_shared<storm::logic::AtomicLabelFormula>(memoryLabels[dimensionIndexMap[subObjIndex]].get());
if (relevantObjectives.get(subObjIndex)) {
auto rightSubFormula = subObjectives[dimensionIndexMap[subObjIndex]].first->asBoundedUntilFormula().getRightSubformula().asSharedPointer();
goalStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, goalStatesFormula, rightSubFormula);
} else {
memLabelFormula = std::make_shared<storm::logic::UnaryBooleanStateFormula>(storm::logic::UnaryBooleanStateFormula::OperatorType::Not, memLabelFormula);
}
if (relevantStatesFormula) {
relevantStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, relevantStatesFormula, memLabelFormula);
} else {
relevantStatesFormula = memLabelFormula;
// find out whether objective reward should be earned within this epoch
bool collectRewardInEpoch = true;
for (auto const& subObjIndex : relevantObjectives) {
if (epoch[dimensionIndexMap[subObjIndex]] < 0) {
collectRewardInEpoch = false;
break;
}
}
storm::storage::BitVector relevantStates = mc.check(*relevantStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
storm::storage::BitVector goalStates = mc.check(*goalStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
storm::utility::vector::addVectors(objRew, modelMemoryProduct->getTransitionMatrix().getConstrainedRowGroupSumVector(relevantStates, goalStates), objRew);
if (collectRewardInEpoch) {
std::shared_ptr<storm::logic::Formula const> relevantStatesFormula;
std::shared_ptr<storm::logic::Formula const> goalStatesFormula = storm::logic::CloneVisitor().clone(*sinkStatesFormula);
for (uint64_t subObjIndex = 0; subObjIndex < dimensionIndexMap.size(); ++subObjIndex) {
std::shared_ptr<storm::logic::Formula> memLabelFormula = std::make_shared<storm::logic::AtomicLabelFormula>(memoryLabels[dimensionIndexMap[subObjIndex]].get());
if (relevantObjectives.get(subObjIndex)) {
auto rightSubFormula = subObjectives[dimensionIndexMap[subObjIndex]].first->asBoundedUntilFormula().getRightSubformula().asSharedPointer();
goalStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, goalStatesFormula, rightSubFormula);
} else {
memLabelFormula = std::make_shared<storm::logic::UnaryBooleanStateFormula>(storm::logic::UnaryBooleanStateFormula::OperatorType::Not, memLabelFormula);
}
if (relevantStatesFormula) {
relevantStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, relevantStatesFormula, memLabelFormula);
} else {
relevantStatesFormula = memLabelFormula;
}
}
storm::storage::BitVector relevantStates = mc.check(*relevantStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
storm::storage::BitVector relevantChoices = modelMemoryProduct->getTransitionMatrix().getRowFilter(relevantStates);
storm::storage::BitVector goalStates = mc.check(*goalStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
for (auto const& choice : relevantChoices) {
objRew[choice] += modelMemoryProduct->getTransitionMatrix().getConstrainedRowSum(choice, goalStates);
}
}
}
objectiveRewards.push_back(std::move(objRew));
// TODO
// Check if the formula is already satisfied in the initial state
// STORM_LOG_THROW((data.originalModel.getInitialStates() & rightSubformulaResult).empty(), storm::exceptions::NotImplementedException, "The Probability for the objective " << *data.objectives.back()->originalFormula << " is always one as the rhs of the until formula is true in the initial state. This (trivial) case is currently not implemented.");
} else if (formula.isRewardOperatorFormula()) {
auto const& rewModel = modelMemoryProduct->getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());
STORM_LOG_THROW(!rewModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Reward model has transition rewards which is not expected.");
@ -402,21 +677,6 @@ namespace storm {
return objectiveRewards;
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setEpochSolution(Epoch const& epoch, EpochSolution const& solution) {
epochSolutions.emplace(epoch, solution);
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setEpochSolution(Epoch const& epoch, EpochSolution&& solution) {
epochSolutions.emplace(epoch, std::move(solution));
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::EpochSolution const& MultiDimensionalRewardUnfolding<ValueType>::getEpochSolution(Epoch const& epoch) {
return epochSolutions.at(epoch);
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::EpochClass MultiDimensionalRewardUnfolding<ValueType>::getClassOfEpoch(Epoch const& epoch) const {
// Get a BitVector that is 1 wherever the epoch is non-negative

71
src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h

@ -8,6 +8,9 @@
#include "storm/models/sparse/Mdp.h"
#include "storm/utility/vector.h"
#include "storm/storage/memorystructure/MemoryStructure.h"
#include "storm/storage/memorystructure/SparseModelMemoryProduct.h"
#include "storm/transformer/EndComponentEliminator.h"
namespace storm {
namespace modelchecker {
@ -21,23 +24,20 @@ namespace storm {
typedef std::vector<int64_t> Epoch; // The number of reward steps that are "left" for each dimension
typedef uint64_t EpochClass; // Collection of epochs that consider the same epoch model
struct SolutionType {
ValueType weightedValue;
std::vector<ValueType> objectiveValues;
};
struct EpochModel {
storm::storage::SparseMatrix<ValueType> rewardTransitions;
storm::storage::SparseMatrix<ValueType> intermediateTransitions;
storm::storage::BitVector rewardChoices;
std::vector<boost::optional<Epoch>> epochSteps;
storm::storage::SparseMatrix<ValueType> epochMatrix;
storm::storage::BitVector stepChoices;
std::vector<SolutionType> stepSolutions;
std::vector<std::vector<ValueType>> objectiveRewards;
std::vector<storm::storage::BitVector> objectiveRewardFilter;
std::vector<storm::storage::BitVector> relevantStates;
storm::storage::BitVector initialStates;
};
struct EpochSolution {
std::vector<ValueType> weightedValues;
std::vector<std::vector<ValueType>> objectiveValues;
};
/*
*
* @param model The (preprocessed) model
@ -54,36 +54,63 @@ namespace storm {
Epoch getStartEpoch();
std::vector<Epoch> getEpochComputationOrder(Epoch const& startEpoch);
EpochModel const& getModelForEpoch(Epoch const& epoch);
void setEpochSolution(Epoch const& epoch, EpochSolution const& solution);
void setEpochSolution(Epoch const& epoch, EpochSolution&& solution);
EpochSolution const& getEpochSolution(Epoch const& epoch);
EpochModel const& setCurrentEpoch(Epoch const& epoch);
void setSolutionForCurrentEpoch(std::vector<SolutionType> const& reducedModelStateSolutions);
SolutionType const& getInitialStateResult(Epoch const& epoch) const;
private:
void setCurrentEpochClass(Epoch const& epoch);
void initialize();
EpochClass getClassOfEpoch(Epoch const& epoch) const;
Epoch getSuccessorEpoch(Epoch const& epoch, Epoch const& step) const;
std::shared_ptr<EpochModel> computeModelForEpoch(Epoch const& epoch);
storm::storage::MemoryStructure computeMemoryStructureForEpoch(Epoch const& epoch) const;
std::vector<std::vector<ValueType>> computeObjectiveRewardsForEpoch(Epoch const& epoch, std::shared_ptr<storm::models::sparse::Mdp<ValueType>> const& modelMemoryProduct) const;
std::vector<std::vector<ValueType>> computeObjectiveRewardsForProduct(Epoch const& epoch) const;
storm::storage::MemoryStructure computeMemoryStructure() const;
std::vector<storm::storage::BitVector> computeMemoryStateMap(storm::storage::MemoryStructure const& memory) const;
storm::storage::BitVector const& convertMemoryState(uint64_t const& memoryState) const;
uint64_t convertMemoryState(storm::storage::BitVector const& memoryState) const;
uint64_t getProductState(uint64_t const& modelState, uint64_t const& memoryState) const;
uint64_t getModelState(uint64_t const& productState) const;
uint64_t getMemoryState(uint64_t const& productState) const;
SolutionType getZeroSolution() const;
void addScaledSolution(SolutionType& solution, SolutionType const& solutionToAdd, ValueType const& scalingFactor) const;
void setSolutionForCurrentEpoch(uint64_t const& productState, SolutionType const& solution);
SolutionType const& getStateSolution(Epoch const& epoch, uint64_t const& productState) const;
storm::models::sparse::Mdp<ValueType> const& model;
std::vector<storm::modelchecker::multiobjective::Objective<ValueType>> const& objectives;
storm::storage::BitVector possibleECActions;
storm::storage::BitVector allowedBottomStates;
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> modelMemoryProduct;
std::shared_ptr<storm::storage::SparseModelMemoryProduct<ValueType>> productBuilder;
std::vector<storm::storage::BitVector> memoryStateMap;
std::vector<boost::optional<Epoch>> productEpochSteps;
storm::storage::BitVector productAllowedBottomStates;
std::vector<uint64_t> modelStates;
std::vector<uint64_t> memoryStates;
std::vector<uint64_t> productChoiceToStateMapping;
typename storm::transformer::EndComponentEliminator<ValueType>::EndComponentEliminatorReturnType ecElimResult;
std::set<Epoch> possibleEpochSteps;
EpochModel epochModel;
boost::optional<Epoch> currentEpoch;
std::vector<storm::storage::BitVector> objectiveDimensions;
std::vector<std::pair<std::shared_ptr<storm::logic::Formula const>, uint64_t>> subObjectives;
std::vector<boost::optional<std::string>> memoryLabels;
std::vector<std::vector<uint64_t>> scaledRewards;
std::vector<ValueType> scalingFactors;
std::map<EpochClass, std::shared_ptr<EpochModel>> epochModels;
std::map<Epoch, EpochSolution> epochSolutions;
std::map<std::vector<int64_t>, SolutionType> solutions;
};
}
}
Loading…
Cancel
Save