From 31f8ec98e49842f672e77951b51bd25f68d55e11 Mon Sep 17 00:00:00 2001
From: TimQu <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 10 Oct 2017 17:32:37 +0200
Subject: [PATCH] implemented the pomdp unfolding to convert k-memory-bounded
 pomdps to memoryless pomdps

---
 src/storm-pomdp-cli/storm-pomdp.cpp           |  85 ++++++----
 .../transformer/PomdpMemoryUnfolder.cpp       | 147 ++++++++++++++++++
 .../transformer/PomdpMemoryUnfolder.h         |  37 +++++
 3 files changed, 235 insertions(+), 34 deletions(-)
 create mode 100644 src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp
 create mode 100644 src/storm-pomdp/transformer/PomdpMemoryUnfolder.h

diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp
index 30ead921e..6af84b76e 100644
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@@ -36,6 +36,7 @@
 #include "storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.h"
 #include "storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.h"
 #include "storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.h"
+#include "storm-pomdp/transformer/PomdpMemoryUnfolder.h"
 #include "storm-pomdp/analysis/UniqueObservationStates.h"
 #include "storm-pomdp/analysis/QualitativeAnalysis.h"
 
@@ -112,45 +113,61 @@ int main(const int argc, const char** argv) {
             STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored.");
         }
         
-        STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl);
-        storm::analysis::UniqueObservationStates<storm::RationalNumber> uniqueAnalysis(*pomdp);
-        std::cout << uniqueAnalysis.analyse() << std::endl;
-
-        // CHECK if prop maximizes, only apply in those situations
-        STORM_PRINT_AND_LOG("Eliminating self-loop choices ...");
-        uint64_t oldChoiceCount = pomdp->getNumberOfChoices();
-        storm::transformer::GlobalPOMDPSelfLoopEliminator<storm::RationalNumber> selfLoopEliminator(*pomdp);
-        pomdp = selfLoopEliminator.transform();
-        STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl);
+        if (pomdpSettings.isAnalyzeUniqueObservationsSet()) {
+            STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl);
+            storm::analysis::UniqueObservationStates<storm::RationalNumber> uniqueAnalysis(*pomdp);
+            std::cout << uniqueAnalysis.analyse() << std::endl;
+        }
 
+        
         if (formula) {
             if (formula->isProbabilityOperatorFormula()) {
-                storm::analysis::QualitativeAnalysis<storm::RationalNumber> qualitativeAnalysis(*pomdp);
-                STORM_PRINT_AND_LOG("Computing states with probability 0 ...");
-                std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl;
-                STORM_PRINT_AND_LOG("Computing states with probability 1 ...");
-                std::cout << qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()) << std::endl;
-            }
-            STORM_PRINT_AND_LOG("Eliminating mec choices ...");
-            // Todo elimination of mec choices only preserves memoryless schedulers. Selfloop elimination becomes redundant if we do mecChoiceElimination
-            oldChoiceCount = pomdp->getNumberOfChoices();
-            storm::transformer::GlobalPomdpMecChoiceEliminator<storm::RationalNumber> mecChoiceEliminator(*pomdp);
-            pomdp = mecChoiceEliminator.transform(*formula);
-            STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl);
-        }
+                if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) {
+                    STORM_PRINT_AND_LOG("Eliminating self-loop choices ...");
+                    uint64_t oldChoiceCount = pomdp->getNumberOfChoices();
+                    storm::transformer::GlobalPOMDPSelfLoopEliminator<storm::RationalNumber> selfLoopEliminator(*pomdp);
+                    pomdp = selfLoopEliminator.transform();
+                    STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl);
+                }
+                if (pomdpSettings.isQualitativeReductionSet()) {
+                    storm::analysis::QualitativeAnalysis<storm::RationalNumber> qualitativeAnalysis(*pomdp);
+                    STORM_PRINT_AND_LOG("Computing states with probability 0 ...");
+                    std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl;
+                    STORM_PRINT_AND_LOG("Computing states with probability 1 ...");
+                    std::cout << qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()) << std::endl;
+                    std::cout << "actual reduction not yet implemented..." << std::endl;
+                }
+                if (pomdpSettings.getMemoryBound() > 1) {
+                    storm::transformer::PomdpMemoryUnfolder<storm::RationalNumber> memoryUnfolder(*pomdp, pomdpSettings.getMemoryBound());
+                    pomdp = memoryUnfolder.transform();
+                }
+                
+                // From now on the pomdp is considered memoryless
+                
+                if (pomdpSettings.isMecReductionSet()) {
+                    STORM_PRINT_AND_LOG("Eliminating mec choices ...");
+                    // Note: Elimination of mec choices only preserves memoryless schedulers.
+                    uint64_t oldChoiceCount = pomdp->getNumberOfChoices();
+                    storm::transformer::GlobalPomdpMecChoiceEliminator<storm::RationalNumber> mecChoiceEliminator(*pomdp);
+                    pomdp = mecChoiceEliminator.transform(*formula);
+                    STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl);
+                }
+                
+                if (pomdpSettings.isExportToParametricSet()) {
+                    storm::transformer::ApplyFiniteSchedulerToPomdp<storm::RationalNumber> toPMCTransformer(*pomdp);
+                    auto const &pmc = toPMCTransformer.transform();
+                    storm::analysis::ConstraintCollector<storm::RationalFunction> constraints(*pmc);
+                    pmc->printModelInformationToStream(std::cout);
+                    auto const& parameterSet = constraints.getVariables();
+                    std::vector<storm::RationalFunctionVariable> parameters(parameterSet.begin(), parameterSet.end());
+                    std::vector<std::string> parameterNames;
+                    for (auto const& parameter : parameters) {
+                        parameterNames.push_back(parameter.getName());
+                    }
+                    storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames);
+                }
 
-        if (pomdpSettings.isExportToParametricSet()) {
-            storm::transformer::ApplyFiniteSchedulerToPomdp<storm::RationalNumber> toPMCTransformer(*pomdp);
-            auto const &pmc = toPMCTransformer.transform();
-            storm::analysis::ConstraintCollector<storm::RationalFunction> constraints(*pmc);
-            pmc->printModelInformationToStream(std::cout);
-            auto const& parameterSet = constraints.getVariables();
-            std::vector<storm::RationalFunctionVariable> parameters(parameterSet.begin(), parameterSet.end());
-            std::vector<std::string> parameterNames;
-            for (auto const& parameter : parameters) {
-                parameterNames.push_back(parameter.getName());
             }
-            storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames);
         }
 
         // All operations have now been performed, so we clean up everything and terminate.
diff --git a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp
new file mode 100644
index 000000000..bc1a47494
--- /dev/null
+++ b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp
@@ -0,0 +1,147 @@
+#include <storm/exceptions/NotSupportedException.h>
+#include "storm-pomdp/transformer/PomdpMemoryUnfolder.h"
+#include "storm/storage/sparse/ModelComponents.h"
+
+#include "storm/exceptions/NotSupportedException.h"
+
+namespace storm {
+    namespace transformer {
+
+            
+            template<typename ValueType>
+            PomdpMemoryUnfolder<ValueType>::PomdpMemoryUnfolder(storm::models::sparse::Pomdp<ValueType> const& pomdp, uint64_t numMemoryStates) : pomdp(pomdp), numMemoryStates(numMemoryStates) {
+                // intentionally left empty
+            }
+
+            
+            template<typename ValueType>
+            std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> PomdpMemoryUnfolder<ValueType>::transform() const {
+                storm::storage::sparse::ModelComponents<ValueType> components;
+                components.transitionMatrix = transformTransitions();
+                components.stateLabeling = transformStateLabeling();
+                components.observabilityClasses = transformObservabilityClasses();
+                for (auto const& rewModel : pomdp.getRewardModels()) {
+                    components.rewardModels.emplace(rewModel.first, transformRewardModel(rewModel.second));
+                }
+                
+                return std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components));
+            }
+        
+        
+            template<typename ValueType>
+            storm::storage::SparseMatrix<ValueType> PomdpMemoryUnfolder<ValueType>::transformTransitions() const {
+                storm::storage::SparseMatrix<ValueType> const& origTransitions = pomdp.getTransitionMatrix();
+                storm::storage::SparseMatrixBuilder<ValueType> builder(pomdp.getNumberOfStates() * numMemoryStates * numMemoryStates,
+                                                                        pomdp.getNumberOfStates() * numMemoryStates,
+                                                                        origTransitions.getEntryCount() * numMemoryStates * numMemoryStates,
+                                                                        true,
+                                                                        false,
+                                                                        pomdp.getNumberOfStates() * numMemoryStates);
+                
+                uint64_t row = 0;
+                for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) {
+                    for (uint32_t memState = 0; memState < numMemoryStates; ++memState) {
+                        builder.newRowGroup(row);
+                        for (uint64_t origRow = origTransitions.getRowGroupIndices()[modelState]; origRow < origTransitions.getRowGroupIndices()[modelState + 1]; ++origRow) {
+                            for (uint32_t memStatePrime = 0; memStatePrime < numMemoryStates; ++memStatePrime) {
+                                for (auto const& entry : origTransitions.getRow(origRow)) {
+                                    builder.addNextValue(row, getUnfoldingState(entry.getColumn(), memStatePrime), entry.getValue());
+                                }
+                                ++row;
+                            }
+                        }
+                    }
+                }
+                return builder.build();
+            }
+        
+            template<typename ValueType>
+            storm::models::sparse::StateLabeling PomdpMemoryUnfolder<ValueType>::transformStateLabeling() const {
+                storm::models::sparse::StateLabeling labeling(pomdp.getNumberOfStates() * numMemoryStates);
+                for (auto const& labelName : pomdp.getStateLabeling().getLabels()) {
+                    storm::storage::BitVector newStates(pomdp.getNumberOfStates() * numMemoryStates, false);
+                    for (auto const& modelState : pomdp.getStateLabeling().getStates(labelName)) {
+                        for (uint32_t memState = 0; memState < numMemoryStates; ++memState) {
+                            newStates.set(getUnfoldingState(modelState, memState));
+                        }
+                    }
+                    labeling.addLabel(labelName, std::move(newStates));
+                }
+                return labeling;
+            }
+        
+            template<typename ValueType>
+            std::vector<uint32_t> PomdpMemoryUnfolder<ValueType>::transformObservabilityClasses() const {
+                std::vector<uint32_t> observations;
+                observations.reserve(pomdp.getNumberOfStates() * numMemoryStates);
+                for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) {
+                    for (uint32_t memState = 0; memState < numMemoryStates; ++memState) {
+                        observations.push_back(getUnfoldingObersvation(pomdp.getObservation(modelState), memState));
+                    }
+                }
+                return observations;
+            }
+        
+            template<typename ValueType>
+            storm::models::sparse::StandardRewardModel<ValueType> PomdpMemoryUnfolder<ValueType>::transformRewardModel(storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) const {
+                boost::optional<std::vector<ValueType>> stateRewards, actionRewards;
+                if (rewardModel.hasStateRewards()) {
+                    stateRewards = std::vector<ValueType>();
+                    stateRewards->reserve(pomdp.getNumberOfStates() * numMemoryStates);
+                    for (auto const& stateReward : rewardModel.getStateRewardVector()) {
+                        for (uint32_t memState = 0; memState < numMemoryStates; ++memState) {
+                            stateRewards->push_back(stateReward);
+                        }
+                    }
+                }
+                if (rewardModel.hasStateActionRewards()) {
+                    actionRewards = std::vector<ValueType>();
+                    stateRewards->reserve(pomdp.getNumberOfStates() * numMemoryStates * numMemoryStates);
+                    for (uint64_t modelState = 0; modelState < pomdp.getNumberOfStates(); ++modelState) {
+                        for (uint32_t memState = 0; memState < numMemoryStates; ++memState) {
+                            for (uint64_t origRow = pomdp.getTransitionMatrix().getRowGroupIndices()[modelState]; origRow < pomdp.getTransitionMatrix().getRowGroupIndices()[modelState + 1]; ++origRow) {
+                                ValueType const& actionReward = rewardModel.getStateActionReward(origRow);
+                                for (uint32_t memStatePrime = 0; memStatePrime < numMemoryStates; ++memStatePrime) {
+                                    actionRewards->push_back(actionReward);
+                                }
+                            }
+                        }
+                    }
+                }
+                STORM_LOG_THROW(rewardModel.hasTransitionRewards(), storm::exceptions::NotSupportedException, "Transition rewards are currently not supported.");
+                return storm::models::sparse::StandardRewardModel<ValueType>(std::move(stateRewards), std::move(actionRewards));
+            }
+
+            template<typename ValueType>
+            uint64_t PomdpMemoryUnfolder<ValueType>::getUnfoldingState(uint64_t modelState, uint32_t memoryState) const {
+                return modelState * numMemoryStates + memoryState;
+            }
+            
+            template<typename ValueType>
+            uint64_t PomdpMemoryUnfolder<ValueType>::getModelState(uint64_t unfoldingState) const {
+                return unfoldingState / numMemoryStates;
+            }
+            
+            template<typename ValueType>
+            uint32_t PomdpMemoryUnfolder<ValueType>::getMemoryState(uint64_t unfoldingState) const {
+                return unfoldingState % numMemoryStates;
+            }
+            
+            template<typename ValueType>
+            uint32_t PomdpMemoryUnfolder<ValueType>::getUnfoldingObersvation(uint32_t modelObservation, uint32_t memoryState) const {
+                return modelObservation * numMemoryStates + memoryState;
+            }
+            
+            template<typename ValueType>
+            uint32_t PomdpMemoryUnfolder<ValueType>::getModelObersvation(uint32_t unfoldingObservation) const {
+                return unfoldingObservation / numMemoryStates;
+            }
+            
+            template<typename ValueType>
+            uint32_t PomdpMemoryUnfolder<ValueType>::getMemoryStateFromObservation(uint32_t unfoldingObservation) const {
+                return unfoldingObservation % numMemoryStates;
+            }
+
+            template class PomdpMemoryUnfolder<storm::RationalNumber>;
+    }
+}
\ No newline at end of file
diff --git a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.h b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.h
new file mode 100644
index 000000000..8b72e64a5
--- /dev/null
+++ b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "storm/models/sparse/Pomdp.h"
+#include "storm/models/sparse/StandardRewardModel.h"
+
+namespace storm {
+    namespace transformer {
+
+        template<typename ValueType>
+        class PomdpMemoryUnfolder {
+
+        public:
+            
+            PomdpMemoryUnfolder(storm::models::sparse::Pomdp<ValueType> const& pomdp, uint64_t numMemoryStates);
+            
+            std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> transform() const;
+
+        private:
+            storm::storage::SparseMatrix<ValueType> transformTransitions() const;
+            storm::models::sparse::StateLabeling transformStateLabeling() const;
+            std::vector<uint32_t> transformObservabilityClasses() const;
+            storm::models::sparse::StandardRewardModel<ValueType> transformRewardModel(storm::models::sparse::StandardRewardModel<ValueType> const& rewardModel) const;
+            
+            uint64_t getUnfoldingState(uint64_t modelState, uint32_t memoryState) const;
+            uint64_t getModelState(uint64_t unfoldingState) const;
+            uint32_t getMemoryState(uint64_t unfoldingState) const;
+            
+            uint32_t getUnfoldingObersvation(uint32_t modelObservation, uint32_t memoryState) const;
+            uint32_t getModelObersvation(uint32_t unfoldingObservation) const;
+            uint32_t getMemoryStateFromObservation(uint32_t unfoldingObservation) const;
+            
+            
+            storm::models::sparse::Pomdp<ValueType> const& pomdp;
+            uint32_t numMemoryStates;
+        };
+    }
+}
\ No newline at end of file