From e02ea57a58772057925542b53237d36790493553 Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Fri, 24 Jan 2020 17:08:46 +0100
Subject: [PATCH] Rudimentary version of the refinement loop

---
 .../ApproximatePOMDPModelchecker.cpp          | 112 +++++++++++++-----
 .../ApproximatePOMDPModelchecker.h            |  30 +++--
 2 files changed, 104 insertions(+), 38 deletions(-)
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 84c055d1f..ab14f7349 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -56,7 +56,7 @@ namespace storm {
                 std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
                 STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
                 underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
-                auto overApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                 underlyingWatch.stop();
 
                 storm::utility::Stopwatch positionalWatch(true);
@@ -80,6 +80,8 @@ namespace storm {
                 auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
                 positionalWatch.stop();
 
+                STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // "
+                                                       << underApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl)
                 STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
 
                 // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
@@ -90,10 +92,65 @@ namespace storm {
                 auto underRes = storm::utility::zero<ValueType>();
                 uint64_t underApproxModelSize = 50;
                 uint64_t refinementCounter = 1;
-                std::unique_ptr<POMDPCheckResult<ValueType>> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
-                                                                                          overApproxMap, underApproxMap, underApproxModelSize);
-                // TODO the actual refinement
-                return res;
+                std::unique_ptr<RefinementComponents<ValueType>> res;
+                while (refinementCounter < 30) {
+                    res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
+                                                     initialOverApproxMap, underApproxMap, underApproxModelSize);
+                    // TODO the actual refinement
+                    // choose which observation(s) to refine
+                    std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
+                    std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
+                    bsmap_type::right_map::const_iterator iter = res->underApproxBeliefStateMap.right.begin();
+                    while (iter != res->underApproxBeliefStateMap.right.end()) {
+                        auto currentBelief = res->beliefList[iter->second];
+                        beliefCount[currentBelief.observation] += 1;
+                        //TODO rename, this is getting confusing
+                        bsmap_type::left_const_iterator it = res->overApproxBeliefStateMap.left.find(iter->second);
+                        if (it != res->overApproxBeliefStateMap.left.end()) {
+                            // If there is an over-approximate value for the belief, use it
+                            auto diff = res->overApproxMap[it->second] - res->underApproxMap[iter->first];
+                            obsAccumulator[currentBelief.observation] += diff;
+                        } else {
+                            //otherwise, we approximate a value TODO this is critical, we have to think about it
+                            auto overApproxValue = storm::utility::zero<ValueType>();
+                            auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation]);
+                            auto subSimplex = temp.first;
+                            auto lambdas = temp.second;
+                            for (size_t j = 0; j < lambdas.size(); ++j) {
+                                if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
+                                    uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]);
+                                    bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId);
+                                    if (approxIter != res->overApproxBeliefStateMap.left.end()) {
+                                        overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second];
+                                    } else {
+                                        overApproxValue += lambdas[j];
+                                    }
+                                }
+                            }
+                            obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[iter->first];
+                        }
+                        ++iter;
+                    }
+
+                    for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
+                        obsAccumulator[i] /= beliefCount[i];
+                    }
+
+                    //TODO think about some other scoring methods
+                    auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end());
+                    STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
+                    STORM_PRINT("   Obs   |  Score  " << std::endl << "---------|---------" << std::endl)
+                    for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
+                        STORM_PRINT(i << "        |" << obsAccumulator[i] << std::endl)
+                        if (obsAccumulator[i] == maxAvgDifference) {
+                            observationResolutionVector[i] *= 2;
+                        }
+                    }
+                    underApproxModelSize += 10;
+                    ++refinementCounter;
+                }
+
+                return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->overApproxValue, res->underApproxValue});
             }
 
             template<typename ValueType, typename RewardModelType>
@@ -417,21 +474,22 @@ namespace storm {
 
                 STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl)
                 //auto underApprox = weightedSumUnderMap[initialBelief.id];
-                auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
+                auto underApproxComponents = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards,
+                                                                       maxUaModelSize);
                 STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
-                STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
-
-                // Transfer the underapproximation results from the belief id space to the MDP id space
-                std::map<uint64_t, ValueType> underApproxResultMap = {{0, storm::utility::zero<ValueType>()},
-                                                                      {1, storm::utility::one<ValueType>()}};
-                for (auto const &belief : beliefGrid) {
-                    underApproxResultMap[beliefStateMap.left.at(belief.id)] = weightedSumUnderMap[belief.id];
-                }
+                STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
 
                 return std::make_unique<RefinementComponents<ValueType>>(
-                        RefinementComponents<ValueType>{modelPtr, overApprox, underApprox, overApproxResultMap, underApproxResultMap, beliefList, beliefIsTarget, beliefStateMap});
+                        RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
+                                                        underApproxComponents->underApproxMap, beliefList, beliefIsTarget, beliefStateMap,
+                                                        underApproxComponents->underApproxBeliefStateMap});
             }
 
+/*
+            template<typename ValueType, typename RewardModelType>
+            std::unique_ptr<RefinementComponents<ValueType>>
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementStep(){}*/
+
             template<typename ValueType, typename RewardModelType>
             ValueType
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::overApproximationValueIteration(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -499,8 +557,8 @@ namespace storm {
                                 }
                                 // Update the selected actions
                                 if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
-                                    (!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
-                                    cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
+                                        (!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
+                                        cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
                                     chosenValue = currentValue;
                                     if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
                                         chosenActionIndices.clear();
@@ -697,7 +755,7 @@ namespace storm {
             }
 
             template<typename ValueType, typename RewardModelType>
-            ValueType
+            std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                                                                 std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
                                                                                                 std::vector<bool> &beliefIsTarget,
@@ -706,13 +764,13 @@ namespace storm {
                                                                                                 bool computeRewards, uint64_t maxModelSize) {
                 std::set<uint64_t> visitedBelieves;
                 std::deque<uint64_t> believesToBeExpanded;
-                std::map<uint64_t, uint64_t> beliefStateMap;
+                bsmap_type beliefStateMap;
                 std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
                                                                                        {{{1, storm::utility::one<ValueType>()}}}};
                 std::vector<uint64_t> targetStates = {1};
 
                 uint64_t stateId = 2;
-                beliefStateMap[initialBeliefId] = stateId;
+                beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId));
                 ++stateId;
                 uint64_t nextId = beliefList.size();
                 uint64_t counter = 0;
@@ -727,8 +785,8 @@ namespace storm {
                     // for targets, we only consider one action with one transition
                     if (beliefIsTarget[currentBeliefId]) {
                         // add a self-loop to target states 
-                        targetStates.push_back(beliefStateMap[currentBeliefId]);
-                        transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one<ValueType>()}}});
+                        targetStates.push_back(beliefStateMap.left.at(currentBeliefId));
+                        transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one<ValueType>()}}});
                     } else if (counter > maxModelSize) {
                         transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
                     } else {
@@ -745,12 +803,12 @@ namespace storm {
                                                                                            observation, nextId);
                                 nextId = beliefList.size();
                                 if (visitedBelieves.insert(nextBeliefId).second) {
-                                    beliefStateMap[nextBeliefId] = stateId;
+                                    beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId));
                                     ++stateId;
                                     believesToBeExpanded.push_back(nextBeliefId);
                                     ++counter;
                                 }
-                                transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second;
+                                transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second;
                             }
                             actionTransitionStorage.push_back(transitionsInStateWithAction);
                         }
@@ -776,7 +834,7 @@ namespace storm {
                 storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
                 if (computeRewards) {
                     storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
-                    for (auto const &iter : beliefStateMap) {
+                    for (auto const &iter : beliefStateMap.left) {
                         auto currentBelief = beliefList[iter.first];
                         auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
                         for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
@@ -808,9 +866,9 @@ namespace storm {
                 STORM_LOG_ASSERT(res, "Result does not exist.");
                 res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
                 auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]];
+                auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
 
-                return underApprox;
+                return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
             }
 
 
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 867f9d673..5e8a95025 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -11,7 +11,6 @@ namespace storm {
     namespace pomdp {
         namespace modelchecker {
             typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
-            typedef boost::bimap<std::pair<uint64_t, uint64_t>, uint64_t> uamap_type;
 
             template<class ValueType>
             struct POMDPCheckResult {
@@ -28,11 +27,19 @@ namespace storm {
                 std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> overApproxModelPtr;
                 ValueType overApproxValue;
                 ValueType underApproxValue;
-                std::map<uint64_t, ValueType> &overApproxMap;
-                std::map<uint64_t, ValueType> &underApproxMap;
-                std::vector<storm::pomdp::Belief<ValueType>> &beliefList;
-                std::vector<bool> &beliefIsTarget;
-                bsmap_type &beliefStateMap;
+                std::map<uint64_t, ValueType> overApproxMap;
+                std::map<uint64_t, ValueType> underApproxMap;
+                std::vector<storm::pomdp::Belief<ValueType>> beliefList;
+                std::vector<bool> beliefIsTarget;
+                bsmap_type overApproxBeliefStateMap;
+                bsmap_type underApproxBeliefStateMap;
+            };
+
+            template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
+            struct UnderApproxComponents {
+                ValueType underApproxValue;
+                std::map<uint64_t, ValueType> underApproxMap;
+                bsmap_type underApproxBeliefStateMap;
             };
 
             template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
@@ -170,11 +177,12 @@ namespace storm {
                  * @param maxModelSize
                  * @return
                  */
-                ValueType computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
-                                                    std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
-                                                    std::vector<bool> &beliefIsTarget,
-                                                    std::set<uint32_t> const &targetObservations,
-                                                    uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize);
+                std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
+                                                                                                             std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
+                                                                                                             std::vector<bool> &beliefIsTarget,
+                                                                                                             std::set<uint32_t> const &targetObservations,
+                                                                                                             uint64_t initialBeliefId, bool min, bool computeReward,
+                                                                                                             uint64_t maxModelSize);
 
                 /**
                  *