679 lines
46 KiB

#include "storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h"
#include "storm/utility/macros.h"
#include "storm/logic/Formulas.h"
#include "storm/logic/CloneVisitor.h"
#include "storm/storage/memorystructure/MemoryStructureBuilder.h"
#include "storm/storage/memorystructure/SparseModelMemoryProduct.h"
#include "storm/modelchecker/propositional/SparsePropositionalModelChecker.h"
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
#include "storm/transformer/EndComponentEliminator.h"
#include "storm/exceptions/UnexpectedException.h"
#include "storm/exceptions/IllegalArgumentException.h"
#include "storm/exceptions/NotSupportedException.h"
namespace storm {
namespace modelchecker {
namespace multiobjective {
template<typename ValueType>
MultiDimensionalRewardUnfolding<ValueType>::MultiDimensionalRewardUnfolding(storm::models::sparse::Mdp<ValueType> const& model, std::vector<storm::modelchecker::multiobjective::Objective<ValueType>> const& objectives, storm::storage::BitVector const& possibleECActions, storm::storage::BitVector const& allowedBottomStates) : model(model), objectives(objectives), possibleECActions(possibleECActions), allowedBottomStates(allowedBottomStates) {
initialize();
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::initialize() {
// collect the time-bounded subobjectives
std::vector<std::vector<uint64_t>> epochSteps;
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
auto const& formula = *this->objectives[objIndex].formula;
if (formula.isProbabilityOperatorFormula()) {
std::vector<std::shared_ptr<storm::logic::Formula const>> subformulas;
if (formula.getSubformula().isBoundedUntilFormula()) {
subformulas.push_back(formula.getSubformula().asSharedPointer());
} else if (formula.getSubformula().isMultiObjectiveFormula()) {
subformulas = formula.getSubformula().asMultiObjectiveFormula().getSubformulas();
} else {
STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unexpected type of subformula for formula " << formula);
}
for (auto const& subformula : subformulas) {
auto const& boundedUntilFormula = subformula->asBoundedUntilFormula();
for (uint64_t dim = 0; dim < boundedUntilFormula.getDimension(); ++dim) {
subObjectives.push_back(std::make_pair(boundedUntilFormula.restrictToDimension(dim), objIndex));
std::string memLabel = "dim" + std::to_string(subObjectives.size()) + "_maybe";
while (model.getStateLabeling().containsLabel(memLabel)) {
memLabel = "_" + memLabel;
}
memoryLabels.push_back(memLabel);
if (boundedUntilFormula.getTimeBoundReference(dim).isTimeBound() || boundedUntilFormula.getTimeBoundReference(dim).isStepBound()) {
epochSteps.push_back(std::vector<uint64_t>(model.getNumberOfChoices(), 1));
scalingFactors.push_back(storm::utility::one<ValueType>());
} else {
STORM_LOG_ASSERT(boundedUntilFormula.getTimeBoundReference(dim).isRewardBound(), "Unexpected type of time bound.");
std::string const& rewardName = boundedUntilFormula.getTimeBoundReference(dim).getRewardName();
STORM_LOG_THROW(this->model.hasRewardModel(rewardName), storm::exceptions::IllegalArgumentException, "No reward model with name '" << rewardName << "' found.");
auto const& rewardModel = this->model.getRewardModel(rewardName);
STORM_LOG_THROW(!rewardModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Transition rewards are currently not supported as reward bounds.");
std::vector<ValueType> actionRewards = rewardModel.getTotalRewardVector(this->model.getTransitionMatrix());
auto discretizedRewardsAndFactor = storm::utility::vector::toIntegralVector<ValueType, uint64_t>(actionRewards);
epochSteps.push_back(std::move(discretizedRewardsAndFactor.first));
scalingFactors.push_back(std::move(discretizedRewardsAndFactor.second));
}
}
}
} else if (formula.isRewardOperatorFormula() && formula.getSubformula().isCumulativeRewardFormula()) {
subObjectives.push_back(std::make_pair(formula.getSubformula().asSharedPointer(), objIndex));
epochSteps.push_back(std::vector<uint64_t>(model.getNumberOfChoices(), 1));
scalingFactors.push_back(storm::utility::one<ValueType>());
memoryLabels.push_back(boost::none);
}
}
// Compute a mapping for each objective to the set of dimensions it considers
for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) {
storm::storage::BitVector dimensions(subObjectives.size(), false);
for (uint64_t subObjIndex = 0; subObjIndex < subObjectives.size(); ++subObjIndex) {
if (subObjectives[subObjIndex].second == objIndex) {
dimensions.set(subObjIndex, true);
}
}
objectiveDimensions.push_back(std::move(dimensions));
}
// collect which epoch steps are possible
possibleEpochSteps.clear();
for (uint64_t choiceIndex = 0; choiceIndex < epochSteps.front().size(); ++choiceIndex) {
Epoch step;
step.reserve(epochSteps.size());
for (auto const& dimensionRewards : epochSteps) {
step.push_back(dimensionRewards[choiceIndex]);
}
possibleEpochSteps.insert(step);
}
// build the model x memory product
auto memoryStructure = computeMemoryStructure();
memoryStateMap = computeMemoryStateMap(memoryStructure);
productBuilder = std::make_shared<storm::storage::SparseModelMemoryProduct<ValueType>>(memoryStructure.product(model));
// todo: we only need to build the reachable states + the full model for each memory state encoding that all subObjectives of an objective are irrelevant
productBuilder->setBuildFullProduct();
modelMemoryProduct = productBuilder->build()->template as<storm::models::sparse::Mdp<ValueType>>();
productEpochSteps.resize(modelMemoryProduct->getNumberOfChoices());
for (uint64_t modelState = 0; modelState < model.getNumberOfStates(); ++modelState) {
uint64_t numChoices = model.getTransitionMatrix().getRowGroupSize(modelState);
uint64_t firstChoice = model.getTransitionMatrix().getRowGroupIndices()[modelState];
for (uint64_t choiceOffset = 0; choiceOffset < numChoices; ++choiceOffset) {
Epoch step;
bool isZeroStep = true;
for (uint64_t dim = 0; dim < epochSteps.size(); ++dim) {
step.push_back(epochSteps[dim][firstChoice + choiceOffset]);
isZeroStep = isZeroStep && step.back() == 0;
}
if (!isZeroStep) {
for (uint64_t memState = 0; memState < memoryStateMap.size(); ++memState) {
uint64_t productState = getProductState(modelState, memState);
uint64_t productChoice = modelMemoryProduct->getTransitionMatrix().getRowGroupIndices()[productState] + choiceOffset;
assert(productChoice < modelMemoryProduct->getTransitionMatrix().getRowGroupIndices()[productState + 1]);
productEpochSteps[productChoice] = step;
}
}
}
}
modelStates.resize(modelMemoryProduct->getNumberOfStates());
memoryStates.resize(modelMemoryProduct->getNumberOfStates());
for (uint64_t modelState = 0; modelState < model.getNumberOfStates(); ++modelState) {
for (uint64_t memoryState = 0; memoryState < memoryStructure.getNumberOfStates(); ++memoryState) {
uint64_t productState = getProductState(modelState, memoryState);
modelStates[productState] = modelState;
memoryStates[productState] = memoryState;
}
}
productChoiceToStateMapping.clear();
productChoiceToStateMapping.reserve(modelMemoryProduct->getNumberOfChoices());
for (uint64_t productState = 0; productState < modelMemoryProduct->getNumberOfStates(); ++productState) {
uint64_t groupSize = modelMemoryProduct->getTransitionMatrix().getRowGroupSize(productState);
for (uint64_t i = 0; i < groupSize; ++i) {
productChoiceToStateMapping.push_back(productState);
}
}
productAllowedBottomStates = storm::storage::BitVector(modelMemoryProduct->getNumberOfStates(), true);
for (auto const& modelState : allowedBottomStates) {
for (uint64_t memoryState = 0; memoryState < memoryStateMap.size(); ++memoryState) {
productAllowedBottomStates.set(getProductState(modelState, memoryState), true);
}
}
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::Epoch MultiDimensionalRewardUnfolding<ValueType>::getStartEpoch() {
Epoch startEpoch;
for (uint64_t dim = 0; dim < this->subObjectives.size(); ++dim) {
storm::expressions::Expression bound;
bool isStrict = false;
storm::logic::Formula const& dimFormula = *subObjectives[dim].first;
if (dimFormula.isBoundedUntilFormula()) {
assert(!dimFormula.asBoundedUntilFormula().isMultiDimensional());
STORM_LOG_THROW(dimFormula.asBoundedUntilFormula().hasUpperBound() && !dimFormula.asBoundedUntilFormula().hasLowerBound(), storm::exceptions::NotSupportedException, "Until formulas with a lower or no upper bound are not supported.");
bound = dimFormula.asBoundedUntilFormula().getUpperBound();
isStrict = dimFormula.asBoundedUntilFormula().isUpperBoundStrict();
} else if (dimFormula.isCumulativeRewardFormula()) {
bound = dimFormula.asCumulativeRewardFormula().getBound();
isStrict = dimFormula.asCumulativeRewardFormula().isBoundStrict();
}
STORM_LOG_THROW(!bound.containsVariables(), storm::exceptions::NotSupportedException, "The bound " << bound << " contains undefined constants.");
ValueType discretizedBound = storm::utility::convertNumber<ValueType>(bound.evaluateAsRational());
discretizedBound /= scalingFactors[dim];
if (isStrict && discretizedBound == storm::utility::floor(discretizedBound)) {
discretizedBound = storm::utility::floor(discretizedBound) - storm::utility::one<ValueType>();
} else {
discretizedBound = storm::utility::floor(discretizedBound);
}
startEpoch.push_back(storm::utility::convertNumber<uint64_t>(discretizedBound));
}
return startEpoch;
}
template<typename ValueType>
std::vector<typename MultiDimensionalRewardUnfolding<ValueType>::Epoch> MultiDimensionalRewardUnfolding<ValueType>::getEpochComputationOrder(Epoch const& startEpoch) {
// perform DFS to get the 'reachable' epochs in the correct order.
std::vector<Epoch> result, dfsStack;
std::set<Epoch> seenEpochs;
seenEpochs.insert(startEpoch);
dfsStack.push_back(startEpoch);
while (!dfsStack.empty()) {
bool hasUnseenSuccessor = false;
for (auto const& step : possibleEpochSteps) {
Epoch successorEpoch = getSuccessorEpoch(dfsStack.back(), step);
if (seenEpochs.find(successorEpoch) == seenEpochs.end()) {
seenEpochs.insert(successorEpoch);
dfsStack.push_back(std::move(successorEpoch));
hasUnseenSuccessor = true;
}
}
if (!hasUnseenSuccessor) {
result.push_back(std::move(dfsStack.back()));
dfsStack.pop_back();
}
}
return result;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::EpochModel const& MultiDimensionalRewardUnfolding<ValueType>::setCurrentEpoch(Epoch const& epoch) {
// Check if we need to update the current epoch class
if (!currentEpoch || getClassOfEpoch(epoch) != getClassOfEpoch(currentEpoch.get())) {
setCurrentEpochClass(epoch);
}
// Find out which objective rewards are earned in this particular epoch
epochModel.objectiveRewardFilter = std::vector<storm::storage::BitVector>(objectives.size(), storm::storage::BitVector(epochModel.objectiveRewards.front().size(), true));
for (auto const& reducedChoice : epochModel.stepChoices) {
uint64_t productChoice = ecElimResult.newToOldRowMapping[reducedChoice];
storm::storage::BitVector memoryState = convertMemoryState(getMemoryState(productChoiceToStateMapping[productChoice]));
Epoch successorEpoch = getSuccessorEpoch(epoch, productEpochSteps[productChoice].get());
for (uint64_t dim = 0; dim < successorEpoch.size(); ++dim) {
if (successorEpoch[dim] < 0 && memoryState.get(dim)) {
epochModel.objectiveRewardFilter[subObjectives[dim].second].set(reducedChoice, false);
}
}
}
// compute the solution for the stepChoices
epochModel.stepSolutions.resize(epochModel.stepChoices.getNumberOfSetBits());
auto stepSolIt = epochModel.stepSolutions.begin();
for (auto const& reducedChoice : epochModel.stepChoices) {
uint64_t productChoice = ecElimResult.newToOldRowMapping[reducedChoice];
SolutionType choiceSolution = getZeroSolution();
Epoch successorEpoch = getSuccessorEpoch(epoch, productEpochSteps[productChoice].get());
storm::storage::BitVector relevantDimensions(successorEpoch.size(), true);
for (uint64_t dim = 0; dim < successorEpoch.size(); ++dim) {
if (successorEpoch[dim] < 0) {
relevantDimensions &= ~objectiveDimensions[subObjectives[dim].second];
}
}
for (auto const& successor : modelMemoryProduct->getTransitionMatrix().getRow(productChoice)) {
storm::storage::BitVector successorMemoryState = convertMemoryState(getMemoryState(successor.getColumn())) & relevantDimensions;
uint64_t successorProductState = getProductState(getModelState(successor.getColumn()), convertMemoryState(successorMemoryState));
SolutionType const& successorSolution = getStateSolution(successorEpoch, successorProductState);
addScaledSolution(choiceSolution, successorSolution, successor.getValue());
}
*stepSolIt = std::move(choiceSolution);
++stepSolIt;
}
assert(epochModel.objectiveRewards.size() == objectives.size());
assert(epochModel.objectiveRewardFilter.size() == objectives.size());
assert(epochModel.epochMatrix.getRowCount() == epochModel.stepChoices.size());
assert(epochModel.stepChoices.size() == epochModel.objectiveRewards.front().size());
assert(epochModel.objectiveRewards.front().size() == epochModel.objectiveRewards.back().size());
assert(epochModel.objectiveRewards.front().size() == epochModel.objectiveRewardFilter.front().size());
assert(epochModel.objectiveRewards.back().size() == epochModel.objectiveRewardFilter.back().size());
assert(epochModel.stepChoices.getNumberOfSetBits() == epochModel.stepSolutions.size());
currentEpoch = epoch;
return epochModel;
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setCurrentEpochClass(Epoch const& epoch) {
auto productObjectiveRewards = computeObjectiveRewardsForProduct(epoch);
storm::storage::BitVector stepChoices(modelMemoryProduct->getNumberOfChoices(), false);
uint64_t choice = 0;
for (auto const& step : productEpochSteps) {
if (step) {
auto eIt = epoch.begin();
for (auto const& s : step.get()) {
if (s != 0 && *eIt >= 0) {
stepChoices.set(choice, true);
break;
}
++eIt;
}
}
++choice;
}
epochModel.epochMatrix = modelMemoryProduct->getTransitionMatrix().filterEntries(~stepChoices);
storm::storage::BitVector zeroObjRewardChoices(modelMemoryProduct->getNumberOfChoices(), true);
for (auto const& objRewards : productObjectiveRewards) {
zeroObjRewardChoices &= storm::utility::vector::filterZero(objRewards);
}
ecElimResult = storm::transformer::EndComponentEliminator<ValueType>::transform(epochModel.epochMatrix, storm::storage::BitVector(modelMemoryProduct->getNumberOfStates(), true), zeroObjRewardChoices & ~stepChoices, productAllowedBottomStates);
epochModel.epochMatrix = std::move(ecElimResult.matrix);
epochModel.stepChoices = storm::storage::BitVector(epochModel.epochMatrix.getRowCount(), false);
for (uint64_t choice = 0; choice < epochModel.epochMatrix.getRowCount(); ++choice) {
if (stepChoices.get(ecElimResult.newToOldRowMapping[choice])) {
epochModel.stepChoices.set(choice, true);
}
}
STORM_LOG_ASSERT(epochModel.stepChoices.getNumberOfSetBits() == stepChoices.getNumberOfSetBits(), "The number of choices leading outside of the epoch does not match for the reduced and unreduced epochMatrix");
epochModel.objectiveRewards.clear();
for (auto const& productObjRew : productObjectiveRewards) {
std::vector<ValueType> reducedModelObjRewards;
reducedModelObjRewards.reserve(epochModel.epochMatrix.getRowCount());
for (auto const& productChoice : ecElimResult.newToOldRowMapping) {
reducedModelObjRewards.push_back(productObjRew[productChoice]);
}
epochModel.objectiveRewards.push_back(std::move(reducedModelObjRewards));
}
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType MultiDimensionalRewardUnfolding<ValueType>::getZeroSolution() const {
SolutionType res;
res.weightedValue = storm::utility::zero<ValueType>();
res.objectiveValues = std::vector<ValueType>(objectives.size(), storm::utility::zero<ValueType>());
return res;
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::addScaledSolution(SolutionType& solution, SolutionType const& solutionToAdd, ValueType const& scalingFactor) const {
solution.weightedValue += solutionToAdd.weightedValue * scalingFactor;
storm::utility::vector::addScaledVector(solution.objectiveValues, solutionToAdd.objectiveValues, scalingFactor);
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setSolutionForCurrentEpoch(std::vector<SolutionType> const& reducedModelStateSolutions) {
for (uint64_t productState = 0; productState < modelMemoryProduct->getNumberOfStates(); ++productState) {
uint64_t reducedModelState = ecElimResult.oldToNewStateMapping[productState];
if (reducedModelState < reducedModelStateSolutions.size()) {
setSolutionForCurrentEpoch(productState, reducedModelStateSolutions[reducedModelState]);
}
}
}
template<typename ValueType>
void MultiDimensionalRewardUnfolding<ValueType>::setSolutionForCurrentEpoch(uint64_t const& productState, SolutionType const& solution) {
STORM_LOG_ASSERT(currentEpoch, "Tried to set a solution for the current epoch, but no epoch was specified before.");
std::vector<int64_t> solutionKey = currentEpoch.get();
solutionKey.push_back(productState);
solutions[solutionKey] = solution;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType const& MultiDimensionalRewardUnfolding<ValueType>::getStateSolution(Epoch const& epoch, uint64_t const& productState) const {
std::vector<int64_t> solutionKey = epoch;
solutionKey.push_back(productState);
auto solutionIt = solutions.find(solutionKey);
STORM_LOG_ASSERT(solutionIt != solutions.end(), "Requested unexisting solution for epoch " << storm::utility::vector::toString(epoch) << ".");
return solutionIt->second;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::SolutionType const& MultiDimensionalRewardUnfolding<ValueType>::getInitialStateResult(Epoch const& epoch) const {
return getStateSolution(epoch, *modelMemoryProduct->getInitialStates().begin());
}
template<typename ValueType>
storm::storage::MemoryStructure MultiDimensionalRewardUnfolding<ValueType>::computeMemoryStructure() const {
storm::modelchecker::SparsePropositionalModelChecker<storm::models::sparse::Mdp<ValueType>> mc(model);
// Create a memory structure that remembers whether (sub)objectives are satisfied
storm::storage::MemoryStructure memory = storm::storage::MemoryStructureBuilder<ValueType>::buildTrivialMemoryStructure(model);
for (uint64_t objIndex = 0; objIndex < objectives.size(); ++objIndex) {
if (!objectives[objIndex].formula->isProbabilityOperatorFormula()) {
continue;
}
std::vector<uint64_t> dimensionIndexMap;
for (auto const& globalDimensionIndex : objectiveDimensions[objIndex]) {
dimensionIndexMap.push_back(globalDimensionIndex);
}
// collect the memory states for this objective
std::vector<storm::storage::BitVector> objMemStates;
storm::storage::BitVector m(dimensionIndexMap.size(), false);
for (; !m.full(); m.increment()) {
objMemStates.push_back(~m);
}
objMemStates.push_back(~m);
assert(objMemStates.size() == 1ull << dimensionIndexMap.size());
// build objective memory
auto objMemoryBuilder = storm::storage::MemoryStructureBuilder<ValueType>(objMemStates.size(), model);
// Get the set of states that for all subobjectives satisfy either the left or the right subformula
storm::storage::BitVector constraintStates(model.getNumberOfStates(), true);
for (auto const& dim : objectiveDimensions[objIndex]) {
auto const& subObj = subObjectives[dim];
STORM_LOG_ASSERT(subObj.first->isBoundedUntilFormula(), "Unexpected Formula type");
constraintStates &=
(mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector() |
mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector());
}
// Build the transitions between the memory states
for (uint64_t memState = 0; memState < objMemStates.size(); ++memState) {
auto const& memStateBV = objMemStates[memState];
for (uint64_t memStatePrime = 0; memStatePrime < objMemStates.size(); ++memStatePrime) {
auto const& memStatePrimeBV = objMemStates[memStatePrime];
if (memStatePrimeBV.isSubsetOf(memStateBV)) {
std::shared_ptr<storm::logic::Formula const> transitionFormula = storm::logic::Formula::getTrueFormula();
for (auto const& subObjIndex : memStateBV) {
std::shared_ptr<storm::logic::Formula const> subObjFormula = subObjectives[dimensionIndexMap[subObjIndex]].first->asBoundedUntilFormula().getRightSubformula().asSharedPointer();
if (memStatePrimeBV.get(subObjIndex)) {
subObjFormula = std::make_shared<storm::logic::UnaryBooleanStateFormula>(storm::logic::UnaryBooleanStateFormula::OperatorType::Not, subObjFormula);
}
transitionFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, transitionFormula, subObjFormula);
}
storm::storage::BitVector transitionStates = mc.check(*transitionFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
if (memStatePrimeBV.empty()) {
transitionStates |= ~constraintStates;
} else {
transitionStates &= constraintStates;
}
objMemoryBuilder.setTransition(memState, memStatePrime, transitionStates);
// Set the initial states
if (memStateBV.full()) {
storm::storage::BitVector initialTransitionStates = model.getInitialStates() & transitionStates;
// At this point we can check whether there is an initial state that already satisfies all subObjectives.
// Such a situation is not supported as we can not reduce this (easily) to an expected reward computation.
STORM_LOG_THROW(!memStatePrimeBV.empty() || initialTransitionStates.empty() || initialTransitionStates.isDisjointFrom(constraintStates), storm::exceptions::NotSupportedException, "The objective " << *objectives[objIndex].formula << " is already satisfied in an initial state. This special case is not supported.");
for (auto const& initState : initialTransitionStates) {
objMemoryBuilder.setInitialMemoryState(initState, memStatePrime);
}
}
}
}
}
// Build the memory labels
for (uint64_t memState = 0; memState < objMemStates.size(); ++memState) {
auto const& memStateBV = objMemStates[memState];
for (auto const& subObjIndex : memStateBV) {
objMemoryBuilder.setLabel(memState, memoryLabels[dimensionIndexMap[subObjIndex]].get());
}
}
/* Old (wrong.. ) implementation
storm::storage::MemoryStructure objMemory = storm::storage::MemoryStructureBuilder<ValueType>::buildTrivialMemoryStructure(model);
for (auto const& dim : objectiveDimensions[objIndex]) {
auto const& subObj = subObjectives[dim];
if (subObj.first->isBoundedUntilFormula()) {
// Create a memory structure that stores whether a PsiState has already been reached
storm::storage::MemoryStructureBuilder<ValueType> subObjMemBuilder(2, model);
subObjMemBuilder.setLabel(0, memoryLabels[dim].get());
storm::storage::BitVector leftSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
storm::storage::BitVector rightSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
subObjMemBuilder.setTransition(0, 0, leftSubformulaResult & ~rightSubformulaResult);
subObjMemBuilder.setTransition(0, 1, rightSubformulaResult);
subObjMemBuilder.setTransition(1, 1, storm::storage::BitVector(model.getNumberOfStates(), true));
for (auto const& initState : model.getInitialStates()) {
subObjMemBuilder.setInitialMemoryState(initState, rightSubformulaResult.get(initState) ? 1 : 0);
}
storm::storage::MemoryStructure subObjMem = subObjMemBuilder.build();
objMemory = objMemory.product(subObjMem);
}
}
// find the memory state that represents that all subObjectives are decided (i.e., Psi_i has been reached for all i)
storm::storage::BitVector decidedState(objMemory.getNumberOfStates(), true);
for (auto const& dim : objectiveDimensions[objIndex]) {
decidedState = decidedState & ~objMemory.getStateLabeling().getStates(memoryLabels[dim].get());
}
assert(decidedState.getNumberOfSetBits() == 1);
// When the set of PhiStates is left for at least one until formula, we switch to the decidedState
storm::storage::MemoryStructureBuilder<ValueType> objMemBuilder(objMemory, model);
for (auto const& dim : objectiveDimensions[objIndex]) {
auto const& subObj = subObjectives[dim];
storm::storage::BitVector constraintModelStates =
mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector() |
mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector();
for (auto const& maybeState : objMemory.getStateLabeling().getStates(memoryLabels[dim].get())) {
objMemBuilder.setTransition(maybeState, *decidedState.begin(), ~constraintModelStates);
}
}
*/
auto objMemory = objMemoryBuilder.build();
memory = memory.product(objMemory);
}
return memory;
}
template<typename ValueType>
std::vector<storm::storage::BitVector> MultiDimensionalRewardUnfolding<ValueType>::computeMemoryStateMap(storm::storage::MemoryStructure const& memory) const {
std::vector<storm::storage::BitVector> result;
for (uint64_t memState = 0; memState < memory.getNumberOfStates(); ++memState) {
storm::storage::BitVector relevantSubObjectives(subObjectives.size(), false);
std::set<std::string> stateLabels = memory.getStateLabeling().getLabelsOfState(memState);
for (uint64_t dim = 0; dim < subObjectives.size(); ++dim) {
if (memoryLabels[dim] && stateLabels.find(memoryLabels[dim].get()) != stateLabels.end()) {
relevantSubObjectives.set(dim, true);
}
}
result.push_back(std::move(relevantSubObjectives));
}
return result;
}
template<typename ValueType>
storm::storage::BitVector const& MultiDimensionalRewardUnfolding<ValueType>::convertMemoryState(uint64_t const& memoryState) const {
return memoryStateMap[memoryState];
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::convertMemoryState(storm::storage::BitVector const& memoryState) const {
auto memStateIt = std::find(memoryStateMap.begin(), memoryStateMap.end(), memoryState);
return memStateIt - memoryStateMap.begin();
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::getProductState(uint64_t const& modelState, uint64_t const& memoryState) const {
uint64_t productState = productBuilder->getResultState(modelState, memoryState);
STORM_LOG_ASSERT(productState < modelMemoryProduct->getNumberOfStates(), "There is no state in the model-memory-product that corresponds to model state " << modelState << " and memory state " << memoryState << ".");
return productState;
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::getModelState(uint64_t const& productState) const {
return modelStates[productState];
}
template<typename ValueType>
uint64_t MultiDimensionalRewardUnfolding<ValueType>::getMemoryState(uint64_t const& productState) const {
return memoryStates[productState];
}
template<typename ValueType>
std::vector<std::vector<ValueType>> MultiDimensionalRewardUnfolding<ValueType>::computeObjectiveRewardsForProduct(Epoch const& epoch) const {
std::vector<std::vector<ValueType>> objectiveRewards;
objectiveRewards.reserve(objectives.size());
for (uint64_t objIndex = 0; objIndex < objectives.size(); ++objIndex) {
auto const& formula = *this->objectives[objIndex].formula;
if (formula.isProbabilityOperatorFormula()) {
storm::modelchecker::SparsePropositionalModelChecker<storm::models::sparse::Mdp<ValueType>> mc(*modelMemoryProduct);
std::vector<uint64_t> dimensionIndexMap;
for (auto const& globalDimensionIndex : objectiveDimensions[objIndex]) {
dimensionIndexMap.push_back(globalDimensionIndex);
}
std::shared_ptr<storm::logic::Formula const> sinkStatesFormula;
for (auto const& dim : objectiveDimensions[objIndex]) {
auto memLabelFormula = std::make_shared<storm::logic::AtomicLabelFormula>(memoryLabels[dim].get());
if (sinkStatesFormula) {
sinkStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::Or, sinkStatesFormula, memLabelFormula);
} else {
sinkStatesFormula = memLabelFormula;
}
}
sinkStatesFormula = std::make_shared<storm::logic::UnaryBooleanStateFormula>(storm::logic::UnaryBooleanStateFormula::OperatorType::Not, sinkStatesFormula);
std::vector<ValueType> objRew(modelMemoryProduct->getTransitionMatrix().getRowCount(), storm::utility::zero<ValueType>());
storm::storage::BitVector relevantObjectives(objectiveDimensions[objIndex].getNumberOfSetBits());
while (!relevantObjectives.full()) {
relevantObjectives.increment();
// find out whether objective reward should be earned within this epoch
bool collectRewardInEpoch = true;
for (auto const& subObjIndex : relevantObjectives) {
if (epoch[dimensionIndexMap[subObjIndex]] < 0) {
collectRewardInEpoch = false;
break;
}
}
if (collectRewardInEpoch) {
std::shared_ptr<storm::logic::Formula const> relevantStatesFormula;
std::shared_ptr<storm::logic::Formula const> goalStatesFormula = storm::logic::CloneVisitor().clone(*sinkStatesFormula);
for (uint64_t subObjIndex = 0; subObjIndex < dimensionIndexMap.size(); ++subObjIndex) {
std::shared_ptr<storm::logic::Formula> memLabelFormula = std::make_shared<storm::logic::AtomicLabelFormula>(memoryLabels[dimensionIndexMap[subObjIndex]].get());
if (relevantObjectives.get(subObjIndex)) {
auto rightSubFormula = subObjectives[dimensionIndexMap[subObjIndex]].first->asBoundedUntilFormula().getRightSubformula().asSharedPointer();
goalStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, goalStatesFormula, rightSubFormula);
} else {
memLabelFormula = std::make_shared<storm::logic::UnaryBooleanStateFormula>(storm::logic::UnaryBooleanStateFormula::OperatorType::Not, memLabelFormula);
}
if (relevantStatesFormula) {
relevantStatesFormula = std::make_shared<storm::logic::BinaryBooleanStateFormula>(storm::logic::BinaryBooleanStateFormula::OperatorType::And, relevantStatesFormula, memLabelFormula);
} else {
relevantStatesFormula = memLabelFormula;
}
}
storm::storage::BitVector relevantStates = mc.check(*relevantStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
storm::storage::BitVector relevantChoices = modelMemoryProduct->getTransitionMatrix().getRowFilter(relevantStates);
storm::storage::BitVector goalStates = mc.check(*goalStatesFormula)->asExplicitQualitativeCheckResult().getTruthValuesVector();
for (auto const& choice : relevantChoices) {
objRew[choice] += modelMemoryProduct->getTransitionMatrix().getConstrainedRowSum(choice, goalStates);
}
}
}
objectiveRewards.push_back(std::move(objRew));
} else if (formula.isRewardOperatorFormula()) {
auto const& rewModel = modelMemoryProduct->getRewardModel(formula.asRewardOperatorFormula().getRewardModelName());
STORM_LOG_THROW(!rewModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Reward model has transition rewards which is not expected.");
bool rewardCollectedInEpoch = true;
if (formula.getSubformula().isCumulativeRewardFormula()) {
assert(objectiveDimensions[objIndex].getNumberOfSetBits() == 1);
rewardCollectedInEpoch = epoch[*objectiveDimensions[objIndex].begin()] >= 0;
} else {
STORM_LOG_THROW(formula.getSubformula().isTotalRewardFormula(), storm::exceptions::UnexpectedException, "Unexpected type of formula " << formula);
}
if (rewardCollectedInEpoch) {
objectiveRewards.push_back(rewModel.getTotalRewardVector(modelMemoryProduct->getTransitionMatrix()));
} else {
objectiveRewards.emplace_back(modelMemoryProduct->getTransitionMatrix().getRowCount(), storm::utility::zero<ValueType>());
}
} else {
STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "Unexpected type of formula " << formula);
}
}
return objectiveRewards;
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::EpochClass MultiDimensionalRewardUnfolding<ValueType>::getClassOfEpoch(Epoch const& epoch) const {
// Get a BitVector that is 1 wherever the epoch is non-negative
storm::storage::BitVector classAsBitVector(epoch.size(), false);
uint64_t i = 0;
for (auto const& e : epoch) {
if (e >= 0) {
classAsBitVector.set(i, true);
}
++i;
}
return classAsBitVector.getAsInt(0, epoch.size());
}
template<typename ValueType>
typename MultiDimensionalRewardUnfolding<ValueType>::Epoch MultiDimensionalRewardUnfolding<ValueType>::getSuccessorEpoch(Epoch const& epoch, Epoch const& step) const {
assert(epoch.size() == step.size());
Epoch result;
result.reserve(epoch.size());
auto stepIt = step.begin();
for (auto const& e : epoch) {
result.push_back(std::max((int64_t) -1, e - *stepIt));
++stepIt;
}
return result;
}
template class MultiDimensionalRewardUnfolding<double>;
template class MultiDimensionalRewardUnfolding<storm::RationalNumber>;
}
}
}