From fffb9207d7394c8fb874f95abf4e09ee048a4edd Mon Sep 17 00:00:00 2001 From: TimQu Date: Tue, 25 Jul 2017 15:36:17 +0200 Subject: [PATCH] implemented basic functionality of the multi-dimensional reward unfolding --- .../MultiDimensionalRewardUnfolding.cpp | 255 +++++++++++++----- .../MultiDimensionalRewardUnfolding.h | 11 +- 2 files changed, 203 insertions(+), 63 deletions(-) diff --git a/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.cpp b/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.cpp index 04b09c5c1..9a85e6e64 100644 --- a/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.cpp +++ b/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.cpp @@ -1,6 +1,13 @@ #include "storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h" +#include "storm/utility/macros.h" +#include "storm/logic/Formulas.h" +#include "storm/storage/memorystructure/MemoryStructureBuilder.h" +#include "storm/modelchecker/propositional/SparsePropositionalModelChecker.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/exceptions/IllegalArgumentException.h" +#include "storm/exceptions/NotSupportedException.h" namespace storm { namespace modelchecker { namespace multiobjective { @@ -8,97 +15,223 @@ namespace storm { template MultiDimensionalRewardUnfolding::MultiDimensionalRewardUnfolding(storm::models::sparse::Mdp const& model, std::vector> const& objectives, storm::storage::BitVector const& possibleECActions, storm::storage::BitVector const& allowedBottomStates) : model(model), objectives(objectives), possibleECActions(possibleECActions), allowedBottomStates(allowedBottomStates) { - - + initialize(); } + + template + void MultiDimensionalRewardUnfolding::initialize() { + + // collect the time-bounded subobjectives + for (uint64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { + auto const& formula = *this->objectives[objIndex].formula; + if (formula.isProbabilityOperatorFormula()) { + std::vector> subformulas; + if (formula.getSubformula().isBoundedUntilFormula()) { + subformulas.push_back(formula.getSubformula().asSharedPointer()); + } else if (formula.getSubformula().isMultiObjectiveFormula()) { + subformulas = formula.getSubformula().asMultiObjectiveFormula().getSubformulas(); + } + for (auto const& subformula : subformulas) { + auto const& boundedUntilFormula = subformula->asBoundedUntilFormula(); + for (uint64_t dim = 0; dim < boundedUntilFormula.getDimension(); ++dim) { + subObjectives.push_back(std::make_pair(boundedUntilFormula.restrictToDimension(dim), objIndex)); + if (boundedUntilFormula.getTimeBoundReference(dim).isTimeBound() || boundedUntilFormula.getTimeBoundReference(dim).isStepBound()) { + scaledRewards.push_back(std::vector(model.getNumberOfChoices(), 1)); + scalingFactors.push_back(storm::utility::one()); + } else { + STORM_LOG_ASSERT(boundedUntilFormula.getTimeBoundReference(dim).isRewardBound(), "Unexpected type of time bound."); + std::string const& rewardName = boundedUntilFormula.getTimeBoundReference(dim).getRewardName(); + STORM_LOG_THROW(this->model.hasRewardModel(rewardName), storm::exceptions::IllegalArgumentException, "No reward model with name '" << rewardName << "' found."); + auto const& rewardModel = this->model.getRewardModel(rewardName); + STORM_LOG_THROW(!rewardModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Transition rewards are currently not supported as reward bounds."); + std::vector actionRewards = rewardModel.getTotalRewardVector(this->model.getTransitionMatrix()); + auto discretizedRewardsAndFactor = storm::utility::vector::toIntegralVector(actionRewards); + scaledRewards.push_back(std::move(discretizedRewardsAndFactor.first)); + scalingFactors.push_back(std::move(discretizedRewardsAndFactor.second)); + } + } + } + } else if (formula.isRewardOperatorFormula() && formula.getSubformula().isCumulativeRewardFormula()) { + subObjectives.push_back(std::make_pair(formula.getSubformula().asSharedPointer(), objIndex)); + scaledRewards.push_back(std::vector(model.getNumberOfChoices(), 1)); + scalingFactors.push_back(storm::utility::one()); + } + } + } + template typename MultiDimensionalRewardUnfolding::Epoch MultiDimensionalRewardUnfolding::getStartEpoch() { - return Epoch(); + Epoch startEpoch; + for (uint64_t dim = 0; dim < this->subObjectives.size(); ++dim) { + storm::expressions::Expression bound; + bool isStrict = false; + storm::logic::Formula const& dimFormula = *subObjectives[dim].first; + if (dimFormula.isBoundedUntilFormula()) { + assert(!dimFormula.asBoundedUntilFormula().isMultiDimensional()); + STORM_LOG_THROW(dimFormula.asBoundedUntilFormula().hasUpperBound() && !dimFormula.asBoundedUntilFormula().hasLowerBound(), storm::exceptions::NotSupportedException, "Until formulas with a lower or no upper bound are not supported."); + bound = dimFormula.asBoundedUntilFormula().getUpperBound(); + isStrict = dimFormula.asBoundedUntilFormula().isUpperBoundStrict(); + } else if (dimFormula.isCumulativeRewardFormula()) { + bound = dimFormula.asCumulativeRewardFormula().getBound(); + isStrict = dimFormula.asCumulativeRewardFormula().isBoundStrict(); + } + STORM_LOG_THROW(!bound.containsVariables(), storm::exceptions::NotSupportedException, "The bound " << bound << " contains undefined constants."); + ValueType discretizedBound = storm::utility::convertNumber(bound.evaluateAsRational()); + discretizedBound /= scalingFactors[dim]; + if (isStrict && discretizedBound == storm::utility::floor(discretizedBound)) { + discretizedBound = storm::utility::floor(discretizedBound) - storm::utility::one(); + } else { + discretizedBound = storm::utility::floor(discretizedBound); + } + startEpoch.push_back(storm::utility::convertNumber(discretizedBound)); + + } + return startEpoch; } template std::vector::Epoch> MultiDimensionalRewardUnfolding::getEpochComputationOrder(Epoch const& startEpoch) { - return std::vector(); + // collect which 'epoch steps' are possible + std::set steps; + for (uint64_t choiceIndex = 0; choiceIndex < scaledRewards.front().size(); ++choiceIndex) { + Epoch step; + step.reserve(scaledRewards.size()); + for (auto const& dimensionRewards : scaledRewards) { + step.push_back(dimensionRewards[choiceIndex]); + } + steps.insert(step); + } + + // perform DFS to get the 'reachable' epochs in the correct order. + std::vector result, dfsStack; + std::set seenEpochs; + seenEpochs.insert(startEpoch); + dfsStack.push_back(startEpoch); + while (!dfsStack.empty()) { + Epoch currEpoch = std::move(dfsStack.back()); + dfsStack.pop_back(); + for (auto const& step : steps) { + Epoch successorEpoch = getSuccessorEpoch(currEpoch, step); + if (seenEpochs.find(successorEpoch) == seenEpochs.end()) { + seenEpochs.insert(successorEpoch); + dfsStack.push_back(std::move(successorEpoch)); + } + } + result.push_back(std::move(currEpoch)); + } + + return result; } template typename MultiDimensionalRewardUnfolding::EpochModel const& MultiDimensionalRewardUnfolding::getModelForEpoch(Epoch const& epoch) { - return EpochModel(); + EpochClass classOfEpoch = getClassOfEpoch(epoch); + auto findRes = epochModels.find(classOfEpoch); + if (findRes != epochModels.end()) { + return findRes->second; + } else { + return epochModels.insert(std::make_pair(classOfEpoch, computeModelForEpoch(epoch))).first->second; + } } - + + + + template + typename MultiDimensionalRewardUnfolding::EpochModel MultiDimensionalRewardUnfolding::computeModelForEpoch(Epoch const& epoch) { + + storm::storage::MemoryStructure memory = computeMemoryStructureForEpoch(epoch); + + + } + + template + storm::storage::MemoryStructure MultiDimensionalRewardUnfolding::computeMemoryStructureForEpoch(Epoch const& epoch) const { + + // Create a memory structure that remembers whether subobjectives are satisfied + storm::storage::MemoryStructure memory = storm::storage::MemoryStructureBuilder::buildTrivialMemoryStructure(model); + for (uint64_t dim = 0 ; dim < subObjectives.size(); ++dim) { + auto const& subObj = subObjectives[dim]; + if (subObj.first->isBoundedUntilFormula()) { + // Create a memory structure that stores whether a non-PhiState or a PsiState has already been reached + storm::storage::MemoryStructureBuilder subObjMemBuilder(2, model); + std::string memLabel = "obj" + std::to_string(subObj.second) + "-" + std::to_string(dim) + "_relevant"; + while (model.getStateLabeling().containsLabel(memLabel) || memory.getStateLabeling().containsLabel(memLabel)) { + memLabel = "_" + memLabel; + } + subObjMemBuilder.setLabel(0, memLabel); + + if (epoch[dim] >= 0) { + storm::modelchecker::SparsePropositionalModelChecker> mc(model); + storm::storage::BitVector rightSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getRightSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector(); + storm::storage::BitVector leftSubformulaResult = mc.check(subObj.first->asBoundedUntilFormula().getLeftSubformula())->asExplicitQualitativeCheckResult().getTruthValuesVector(); + + // TODO ?? + // Check if the formula is already satisfied in the initial state + // STORM_LOG_THROW((data.originalModel.getInitialStates() & rightSubformulaResult).empty(), storm::exceptions::NotImplementedException, "The Probability for the objective " << *data.objectives.back()->originalFormula << " is always one as the rhs of the until formula is true in the initial state. This (trivial) case is currently not implemented."); + + storm::storage::BitVector nonRelevantStates = ~leftSubformulaResult | rightSubformulaResult; + subObjMemBuilder.setTransition(0, 0, ~nonRelevantStates); + subObjMemBuilder.setTransition(0, 1, nonRelevantStates); + subObjMemBuilder.setTransition(1, 1, storm::storage::BitVector(model.getNumberOfStates(), true)); + for (auto const& initState : model.getInitialStates()) { + subObjMemBuilder.setInitialMemoryState(initState, nonRelevantStates.get(initState) ? 1 : 0); + } + } else { + subObjMemBuilder.setTransition(0, 0, storm::storage::BitVector(model.getNumberOfStates(), true)); + subObjMemBuilder.setTransition(1, 1, storm::storage::BitVector(model.getNumberOfStates(), true)); + } + storm::storage::MemoryStructure subObjMem = subObjMemBuilder.build(); + memory = memory.product(subObjMem); + } + } + + return memory; + } + + template void MultiDimensionalRewardUnfolding::setEpochSolution(Epoch const& epoch, EpochSolution const& solution) { - + epochSolutions.emplace(epoch, solution); } template void MultiDimensionalRewardUnfolding::setEpochSolution(Epoch const& epoch, EpochSolution&& solution) { - + epochSolutions.emplace(epoch, std::move(solution)); } template typename MultiDimensionalRewardUnfolding::EpochSolution const& MultiDimensionalRewardUnfolding::getEpochSolution(Epoch const& epoch) { - return EpochSolution(); + return epochSolutions.at(epoch); } template - typename MultiDimensionalRewardUnfolding::EpochClass MultiDimensionalRewardUnfolding::getClassOfEpoch(Epoch const& epoch) { - return 0; - } - - - /* - template - void SparseMdpPcaaWeightVectorChecker::discretizeRewardBounds() { - - std::map rewardModelNameToDimension; - objIndexToRewardDimensionMapping = std::vector(this->objectives.size(), std::numeric_limits::max()); - - // Collect the reward models that occur as reward bound - uint_fast64_t dimensionCount = 0; - for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { - auto const& obj = this->objectives[objIndex]; - if (obj.timeBoundReference && obj.timeBoundReference->isRewardBound()) { - auto insertionRes = rewardModelNameToDimension.insert(std::make_pair(obj.timeBoundReference->getRewardName(), dimensionCount)); - objIndexToRewardDimensionMapping[objIndex] = insertionRes.first->second; - if (insertionRes.second) { - ++dimensionCount; - } + typename MultiDimensionalRewardUnfolding::EpochClass MultiDimensionalRewardUnfolding::getClassOfEpoch(Epoch const& epoch) const { + // Get a BitVector that is 1 wherever the epoch is non-negative + storm::storage::BitVector classAsBitVector(epoch.size(), false); + uint64_t i = 0; + for (auto const& e : epoch) { + if (e >= 0) { + classAsBitVector.set(i, true); } + ++i; } - - // Now discretize each reward - discretizedActionRewards.reserve(dimensionCount); - discretizedRewardBounds = std::vector(this->objectives.size(), 0); - for (auto const& rewardName : rewardModelNameToDimension) { - STORM_LOG_THROW(this->model.hasRewardModel(rewardName.first), storm::exceptions::IllegalArgumentException, "No reward model with name '" << rewardName.first << "' found."); - auto const& rewardModel = this->model.getRewardModel(rewardName.first); - STORM_LOG_THROW(!rewardModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Transition rewards are currently not supported."); - std::vector actionRewards = rewardModel.getTotalRewardVector(this->model.getTransitionMatrix()); - auto discretizedRewardsAndFactor = storm::utility::vector::toIntegralVector(actionRewards); - - discretizedActionRewards.push_back(std::move(discretizedRewardsAndFactor.first)); - // Find all objectives that reffer to this reward model and apply the discretization factor to the reward bounds - for (uint_fast64_t objIndex = 0; objIndex < objIndexToRewardDimensionMapping.size(); ++objIndex) { - if (objIndexToRewardDimensionMapping[objIndex] == rewardName.second) { - auto const& obj = this->objectives[objIndex]; - STORM_LOG_THROW(!obj.lowerTimeBound, storm::exceptions::NotSupportedException, "Lower time bounds are not supported."); - STORM_LOG_THROW(obj.upperTimeBound, storm::exceptions::UnexpectedException, "Got formula with a bound reference but no bound."); - STORM_LOG_THROW(!obj.upperTimeBound->getBound().containsVariables(), storm::exceptions::UnexpectedException, "The upper bound of a reward bounded formula still contains variables."); - typename RewardModelType::ValueType discretizedBound = storm::utility::convertNumber(obj.upperTimeBound->getBound().evaluateAsRational()); - discretizedBound /= discretizedRewardsAndFactor.second; - if (obj.upperTimeBound->isStrict() && discretizedBound == storm::utility::floor(discretizedBound)) { - discretizedBound = storm::utility::floor(discretizedBound) - storm::utility::one(); - } else { - discretizedBound = storm::utility::floor(discretizedBound); - } - discretizedRewardBounds[objIndex] = storm::utility::convertNumber(discretizedBound); - } - } + return classAsBitVector.getAsInt(0, epoch.size()); + } + + template + typename MultiDimensionalRewardUnfolding::Epoch MultiDimensionalRewardUnfolding::getSuccessorEpoch(Epoch const& epoch, Epoch const& step) const { + assert(epoch.size() == step.size()); + Epoch result; + result.reserve(epoch.size()); + auto stepIt = step.begin(); + for (auto const& e : epoch) { + result.push_back(std::max((int64_t) -1, e - *stepIt)); + ++stepIt; } + return result; } - */ + template class MultiDimensionalRewardUnfolding; template class MultiDimensionalRewardUnfolding; diff --git a/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h b/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h index d38663a69..b7e3dafd2 100644 --- a/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h +++ b/src/storm/modelchecker/multiobjective/rewardbounded/MultiDimensionalRewardUnfolding.h @@ -6,6 +6,7 @@ #include "storm/modelchecker/multiobjective/Objective.h" #include "storm/models/sparse/Mdp.h" #include "storm/utility/vector.h" +#include "storm/storage/memorystructure/MemoryStructure.h" namespace storm { namespace modelchecker { @@ -28,7 +29,7 @@ namespace storm { std::vector> objectiveValues; }; - typedef std::vector Epoch; // The number of reward steps that are "left" for each dimension + typedef std::vector Epoch; // The number of reward steps that are "left" for each dimension typedef uint64_t EpochClass; // Collection of epochs that consider the same epoch model /* @@ -54,7 +55,13 @@ namespace storm { EpochSolution const& getEpochSolution(Epoch const& epoch); private: - EpochClass getClassOfEpoch(Epoch const& epoch); + + void initialize(); + EpochClass getClassOfEpoch(Epoch const& epoch) const; + Epoch getSuccessorEpoch(Epoch const& epoch, Epoch const& step) const; + + EpochModel computeModelForEpoch(Epoch const& epoch); + storm::storage::MemoryStructure computeMemoryStructureForEpoch(Epoch const& epoch) const; storm::models::sparse::Mdp const& model;