From 31f8ec98e49842f672e77951b51bd25f68d55e11 Mon Sep 17 00:00:00 2001 From: TimQu Date: Tue, 10 Oct 2017 17:32:37 +0200 Subject: [PATCH] implemented the pomdp unfolding to convert k-memory-bounded pomdps to memoryless pomdps --- src/storm-pomdp-cli/storm-pomdp.cpp | 85 ++++++---- .../transformer/PomdpMemoryUnfolder.cpp | 147 ++++++++++++++++++ .../transformer/PomdpMemoryUnfolder.h | 37 +++++ 3 files changed, 235 insertions(+), 34 deletions(-) create mode 100644 src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp create mode 100644 src/storm-pomdp/transformer/PomdpMemoryUnfolder.h diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 30ead921e..6af84b76e 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -36,6 +36,7 @@ #include "storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.h" #include "storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.h" #include "storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.h" +#include "storm-pomdp/transformer/PomdpMemoryUnfolder.h" #include "storm-pomdp/analysis/UniqueObservationStates.h" #include "storm-pomdp/analysis/QualitativeAnalysis.h" @@ -112,45 +113,61 @@ int main(const int argc, const char** argv) { STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); } - STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); - storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); - std::cout << uniqueAnalysis.analyse() << std::endl; - - // CHECK if prop maximizes, only apply in those situations - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { + STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); + storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); + std::cout << uniqueAnalysis.analyse() << std::endl; + } + if (formula) { if (formula->isProbabilityOperatorFormula()) { - storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); - STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); - std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl; - STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); - std::cout << qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()) << std::endl; - } - STORM_PRINT_AND_LOG("Eliminating mec choices ..."); - // Todo elimination of mec choices only preserves memoryless schedulers. Selfloop elimination becomes redundant if we do mecChoiceElimination - oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); - pomdp = mecChoiceEliminator.transform(*formula); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); - } + if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { + STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + pomdp = selfLoopEliminator.transform(); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + } + if (pomdpSettings.isQualitativeReductionSet()) { + storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); + STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); + std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl; + STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); + std::cout << qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()) << std::endl; + std::cout << "actual reduction not yet implemented..." << std::endl; + } + if (pomdpSettings.getMemoryBound() > 1) { + storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, pomdpSettings.getMemoryBound()); + pomdp = memoryUnfolder.transform(); + } + + // From now on the pomdp is considered memoryless + + if (pomdpSettings.isMecReductionSet()) { + STORM_PRINT_AND_LOG("Eliminating mec choices ..."); + // Note: Elimination of mec choices only preserves memoryless schedulers. + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); + pomdp = mecChoiceEliminator.transform(*formula); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); + } + + if (pomdpSettings.isExportToParametricSet()) { + storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); + auto const &pmc = toPMCTransformer.transform(); + storm::analysis::ConstraintCollector constraints(*pmc); + pmc->printModelInformationToStream(std::cout); + auto const& parameterSet = constraints.getVariables(); + std::vector parameters(parameterSet.begin(), parameterSet.end()); + std::vector parameterNames; + for (auto const& parameter : parameters) { + parameterNames.push_back(parameter.getName()); + } + storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); + } - if (pomdpSettings.isExportToParametricSet()) { - storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); - auto const &pmc = toPMCTransformer.transform(); - storm::analysis::ConstraintCollector constraints(*pmc); - pmc->printModelInformationToStream(std::cout); - auto const& parameterSet = constraints.getVariables(); - std::vector parameters(parameterSet.begin(), parameterSet.end()); - std::vector parameterNames; - for (auto const& parameter : parameters) { - parameterNames.push_back(parameter.getName()); } - storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); } // All operations have now been performed, so we clean up everything and terminate. diff --git a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp new file mode 100644 index 000000000..bc1a47494 --- /dev/null +++ b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp @@ -0,0 +1,147 @@ +#include +#include "storm-pomdp/transformer/PomdpMemoryUnfolder.h" +#include "storm/storage/sparse/ModelComponents.h" + +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace transformer { + + + template + PomdpMemoryUnfolder::PomdpMemoryUnfolder(storm::models::sparse::Pomdp const& pomdp, uint64_t numMemoryStates) : pomdp(pomdp), numMemoryStates(numMemoryStates) { + // intentionally left empty + } + + + template + std::shared_ptr> PomdpMemoryUnfolder::transform() const { + storm::storage::sparse::ModelComponents components; + components.transitionMatrix = transformTransitions(); + components.stateLabeling = transformStateLabeling(); + components.observabilityClasses = transformObservabilityClasses(); + for (auto const& rewModel : pomdp.getRewardModels()) { + components.rewardModels.emplace(rewModel.first, transformRewardModel(rewModel.second)); + } + + return std::make_shared>(std::move(components)); + } + + + template + storm::storage::SparseMatrix PomdpMemoryUnfolder::transformTransitions() const { + storm::storage::SparseMatrix const& origTransitions = pomdp.getTransitionMatrix(); + storm::storage::SparseMatrixBuilder builder(pomdp.getNumberOfStates() * numMemoryStates * numMemoryStates, + pomdp.getNumberOfStates() * numMemoryStates, + origTransitions.getEntryCount() * numMemoryStates * numMemoryStates, + true, + false, + pomdp.getNumberOfStates() * numMemoryStates); + + uint64_t row = 0; + for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) { + for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { + builder.newRowGroup(row); + for (uint64_t origRow = origTransitions.getRowGroupIndices()[modelState]; origRow < origTransitions.getRowGroupIndices()[modelState + 1]; ++origRow) { + for (uint32_t memStatePrime = 0; memStatePrime < numMemoryStates; ++memStatePrime) { + for (auto const& entry : origTransitions.getRow(origRow)) { + builder.addNextValue(row, getUnfoldingState(entry.getColumn(), memStatePrime), entry.getValue()); + } + ++row; + } + } + } + } + return builder.build(); + } + + template + storm::models::sparse::StateLabeling PomdpMemoryUnfolder::transformStateLabeling() const { + storm::models::sparse::StateLabeling labeling(pomdp.getNumberOfStates() * numMemoryStates); + for (auto const& labelName : pomdp.getStateLabeling().getLabels()) { + storm::storage::BitVector newStates(pomdp.getNumberOfStates() * numMemoryStates, false); + for (auto const& modelState : pomdp.getStateLabeling().getStates(labelName)) { + for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { + newStates.set(getUnfoldingState(modelState, memState)); + } + } + labeling.addLabel(labelName, std::move(newStates)); + } + return labeling; + } + + template + std::vector PomdpMemoryUnfolder::transformObservabilityClasses() const { + std::vector observations; + observations.reserve(pomdp.getNumberOfStates() * numMemoryStates); + for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) { + for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { + observations.push_back(getUnfoldingObersvation(pomdp.getObservation(modelState), memState)); + } + } + return observations; + } + + template + storm::models::sparse::StandardRewardModel PomdpMemoryUnfolder::transformRewardModel(storm::models::sparse::StandardRewardModel const& rewardModel) const { + boost::optional> stateRewards, actionRewards; + if (rewardModel.hasStateRewards()) { + stateRewards = std::vector(); + stateRewards->reserve(pomdp.getNumberOfStates() * numMemoryStates); + for (auto const& stateReward : rewardModel.getStateRewardVector()) { + for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { + stateRewards->push_back(stateReward); + } + } + } + if (rewardModel.hasStateActionRewards()) { + actionRewards = std::vector(); + stateRewards->reserve(pomdp.getNumberOfStates() * numMemoryStates * numMemoryStates); + for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) { + for (uint32_t memState = 0; memState < numMemoryStates; ++memState) { + for (uint64_t origRow = pomdp.getTransitionMatrix().getRowGroupIndices()[modelState]; origRow < pomdp.getTransitionMatrix().getRowGroupIndices()[modelState + 1]; ++origRow) { + ValueType const& actionReward = rewardModel.getStateActionReward(origRow); + for (uint32_t memStatePrime = 0; memStatePrime < numMemoryStates; ++memStatePrime) { + actionRewards->push_back(actionReward); + } + } + } + } + } + STORM_LOG_THROW(rewardModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Transition rewards are currently not supported."); + return storm::models::sparse::StandardRewardModel(std::move(stateRewards), std::move(actionRewards)); + } + + template + uint64_t PomdpMemoryUnfolder::getUnfoldingState(uint64_t modelState, uint32_t memoryState) const { + return modelState * numMemoryStates + memoryState; + } + + template + uint64_t PomdpMemoryUnfolder::getModelState(uint64_t unfoldingState) const { + return unfoldingState / numMemoryStates; + } + + template + uint32_t PomdpMemoryUnfolder::getMemoryState(uint64_t unfoldingState) const { + return unfoldingState % numMemoryStates; + } + + template + uint32_t PomdpMemoryUnfolder::getUnfoldingObersvation(uint32_t modelObservation, uint32_t memoryState) const { + return modelObservation * numMemoryStates + memoryState; + } + + template + uint32_t PomdpMemoryUnfolder::getModelObersvation(uint32_t unfoldingObservation) const { + return unfoldingObservation / numMemoryStates; + } + + template + uint32_t PomdpMemoryUnfolder::getMemoryStateFromObservation(uint32_t unfoldingObservation) const { + return unfoldingObservation % numMemoryStates; + } + + template class PomdpMemoryUnfolder; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.h b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.h new file mode 100644 index 000000000..8b72e64a5 --- /dev/null +++ b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.h @@ -0,0 +1,37 @@ +#pragma once + +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" + +namespace storm { + namespace transformer { + + template + class PomdpMemoryUnfolder { + + public: + + PomdpMemoryUnfolder(storm::models::sparse::Pomdp const& pomdp, uint64_t numMemoryStates); + + std::shared_ptr> transform() const; + + private: + storm::storage::SparseMatrix transformTransitions() const; + storm::models::sparse::StateLabeling transformStateLabeling() const; + std::vector transformObservabilityClasses() const; + storm::models::sparse::StandardRewardModel transformRewardModel(storm::models::sparse::StandardRewardModel const& rewardModel) const; + + uint64_t getUnfoldingState(uint64_t modelState, uint32_t memoryState) const; + uint64_t getModelState(uint64_t unfoldingState) const; + uint32_t getMemoryState(uint64_t unfoldingState) const; + + uint32_t getUnfoldingObersvation(uint32_t modelObservation, uint32_t memoryState) const; + uint32_t getModelObersvation(uint32_t unfoldingObservation) const; + uint32_t getMemoryStateFromObservation(uint32_t unfoldingObservation) const; + + + storm::models::sparse::Pomdp const& pomdp; + uint32_t numMemoryStates; + }; + } +} \ No newline at end of file