Implementation of exploration stopping in refinement procedure for newly added states

6 years ago · 44fd26bd13
1 changed files with 51 additions and 40 deletions
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -552,10 +552,21 @@ namespace storm {
                                                                                            boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
                                                                                            boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
                                                                                            uint64_t maxUaModelSize) {
+                bool initialBoundMapsSet = overApproximationMap && underApproximationMap;
+                std::map<uint64_t, ValueType> initialOverMap;
+                std::map<uint64_t, ValueType> initialUnderMap;
+                if (initialBoundMapsSet) {
+                    initialOverMap = overApproximationMap.value();
+                    initialUnderMap = underApproximationMap.value();
+                }
                // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively
                std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
                std::map<uint64_t, std::vector<ValueType>> lambdaCache;

+                // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace
+                std::map<uint64_t, ValueType> weightedSumOverMap;
+                std::map<uint64_t, ValueType> weightedSumUnderMap;
+
                uint64_t nextBeliefId = refinementComponents->beliefList.size();
                uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
                std::set<uint64_t> relevantStates;
@ -622,18 +633,16 @@ namespace storm {
                                    refinementComponents->beliefGrid.push_back(gridBelief);
                                    refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
                                    // compute overapproximate value using MDP result map
-                                    //TODO do this
-                                    /*
-                                    if (boundMapsSet) {
+                                    if (initialBoundMapsSet) {
                                        auto tempWeightedSumOver = storm::utility::zero<ValueType>();
                                        auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
                                        for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                            tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
-                                            tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
+                                            tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialOverMap[i]);
+                                            tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialUnderMap[i]);
+                                        }
+                                        weightedSumOverMap[nextBeliefId] = tempWeightedSumOver;
+                                        weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder;
                                    }
-                                        weightedSumOverMap[nextId] = tempWeightedSumOver;
-                                        weightedSumUnderMap[nextId] = tempWeightedSumUnder;
-                                    } */
                                    beliefsToBeExpanded.push_back(nextBeliefId);
                                    refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId));
                                    transitionInActionBelief[nextStateId] = iter->second * lambdas[j];
@ -654,17 +663,25 @@ namespace storm {
                        transitionsStateActionPair[stateActionPair] = transitionInActionBelief;
                    }
                }
+
+                std::set<uint64_t> stoppedExplorationStateSet;
+
                // Expand newly added beliefs
                while (!beliefsToBeExpanded.empty()) {
                    uint64_t currId = beliefsToBeExpanded.front();
                    beliefsToBeExpanded.pop_front();
                    bool isTarget = refinementComponents->beliefIsTarget[currId];

-                    /* TODO
-                    if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber<ValueType>(options.explorationThreshold))) {
-                        mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
+                    if (initialBoundMapsSet &&
+                        cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber<ValueType>(options.explorationThreshold))) {
+                        STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId]
+                                                                 << std::endl)
+                        transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]},
+                                                                                                                                         {0, storm::utility::one<ValueType>() -
+                                                                                                                                             weightedSumOverMap[currId]}};
+                        stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId));
                        continue;
-                    }*/
+                    }

                    if (isTarget) {
                        // Depending on whether we compute rewards, we select the right initial result
@ -690,21 +707,21 @@ namespace storm {
                                //Triangulate here and put the possibly resulting belief in the grid
                                std::vector<std::map<uint64_t, ValueType>> subSimplex;
                                std::vector<ValueType> lambdas;
-                                /* TODO Caching
+
                                if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
                                    subSimplex = subSimplexCache[idNextBelief];
                                    lambdas = lambdaCache[idNextBelief];
-                                } else { */
+                                } else {
                                    auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities,
                                                                            observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation],
                                                                            pomdp.getNumberOfStates());
                                    subSimplex = temp.first;
                                    lambdas = temp.second;
-                                /*if (options.cacheSubsimplices) {
+                                    if (options.cacheSubsimplices) {
                                        subSimplexCache[idNextBelief] = subSimplex;
                                        lambdaCache[idNextBelief] = lambdas;
                                    }
-                            }*/
+                                }

                                for (size_t j = 0; j < lambdas.size(); ++j) {
                                    if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
@ -716,17 +733,16 @@ namespace storm {
                                            refinementComponents->beliefGrid.push_back(gridBelief);
                                            refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
                                            // compute overapproximate value using MDP result map
-                                            /*
-                                            if (boundMapsSet) {
+                                            if (initialBoundMapsSet) {
                                                auto tempWeightedSumOver = storm::utility::zero<ValueType>();
                                                auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
                                                for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
-                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
-                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
+                                                    tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialOverMap[i]);
+                                                    tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(initialUnderMap[i]);
+                                                }
+                                                weightedSumOverMap[nextBeliefId] = tempWeightedSumOver;
+                                                weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder;
                                            }
-                                                weightedSumOverMap[nextId] = tempWeightedSumOver;
-                                                weightedSumUnderMap[nextId] = tempWeightedSumUnder;
-                                            } */
                                            beliefsToBeExpanded.push_back(nextBeliefId);
                                            refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId));
                                            transitionInActionBelief[nextStateId] = iter->second * lambdas[j];
@ -750,15 +766,7 @@ namespace storm {
                        /*
                        if (computeRewards) {
                            beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState));
-                        }
-
-
-                        if (transitionsInBelief.empty()) {
-                            std::map<uint64_t, ValueType> transitionInActionBelief;
-                            transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
-                            transitionsInBelief.push_back(transitionInActionBelief);
-                        }
-                        mdpTransitions.push_back(transitionsInBelief);*/
+                        }*/
                    }
                }

@ -766,7 +774,7 @@ namespace storm {
                mdpLabeling.addLabel("init");
                mdpLabeling.addLabel("target");
                mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId));
-
+                mdpLabeling.addLabelToState("target", 1);
                uint_fast64_t currentRow = 0;
                uint_fast64_t currentRowGroup = 0;
                storm::storage::SparseMatrixBuilder<ValueType> smb(0, nextStateId, 0, false, true);
@ -801,6 +809,9 @@ namespace storm {
                            mdpLabeling.addLabelToState("target", state);
                            break;
                        }
+                        if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) {
+                            break;
+                        }
                    }
                    ++currentRowGroup;
                }