From a5fc9fc5b75a8f802d7cdd169f99741d73750396 Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Fri, 24 Jan 2020 11:05:25 +0100
Subject: [PATCH] Added naive underapproximation by unrolling the belief
 support up to a given size

---
 .../ApproximatePOMDPModelchecker.cpp          | 311 ++++--------------
 .../ApproximatePOMDPModelchecker.h            |  17 +-
 2 files changed, 66 insertions(+), 262 deletions(-)
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 4144bf91c..84c055d1f 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -88,9 +88,10 @@ namespace storm {
                 std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), gridResolution);
                 auto overRes = storm::utility::one<ValueType>();
                 auto underRes = storm::utility::zero<ValueType>();
+                uint64_t underApproxModelSize = 50;
                 uint64_t refinementCounter = 1;
                 std::unique_ptr<POMDPCheckResult<ValueType>> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
-                                                                                          overApproxMap, underApproxMap);
+                                                                                          overApproxMap, underApproxMap, underApproxModelSize);
                 // TODO the actual refinement
                 return res;
             }
@@ -102,10 +103,11 @@ namespace storm {
                                                                                              std::vector<uint64_t> &observationResolutionVector,
                                                                                              bool computeRewards, double explorationThreshold,
                                                                                              boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                             boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
+                                                                                             boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
+                                                                                             uint64_t maxUaModelSize) {
                 STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
                 auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap,
-                                                         underApproximationMap);
+                                                         underApproximationMap, maxUaModelSize);
                 return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
             }
 
@@ -117,7 +119,8 @@ namespace storm {
                                                                                                  std::vector<uint64_t> &observationResolutionVector,
                                                                                                  bool computeRewards, double explorationThreshold,
                                                                                                  boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
-                                                                                                 boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
+                                                                                                 boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
+                                                                                                 uint64_t maxUaModelSize) {
                 bool boundMapsSet = overApproximationMap && underApproximationMap;
                 std::map<uint64_t, ValueType> overMap;
                 std::map<uint64_t, ValueType> underMap;
@@ -413,183 +416,8 @@ namespace storm {
                 auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)];
 
                 STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl)
-
-                // Prototypical implementation of the underapproximation - WRONG
-                /*
-                // The map has the following form: (beliefId, action) --> stateId
-                uamap_type uaStateMap;
-                // Reserve states 0 and 1 as always sink/goal states
-                std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> uaTransitions = {{0,{{{0, storm::utility::one<ValueType>()}}}},
-                                                                                                {1,{{{1, storm::utility::one<ValueType>()}}}}};
-                // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
-                std::vector<uint64_t> uaTargetStates = {1};
-                uint64_t uaStateId = 2;
-
-                // for beliefs which are both in the actual belief support and the grid, we use the max value for the action to indicate the support belief
-                uaStateMap.insert(uamap_type::value_type(std::make_pair(initialBelief.id, std::numeric_limits<uint64_t>::max()), uaStateId));
-                ++uaStateId;
-                beliefsToBeExpanded.push_back(initialBelief.id);
-                while(!beliefsToBeExpanded.empty()){
-                    uint64_t currId = beliefsToBeExpanded.front();
-                    beliefsToBeExpanded.pop_front();
-                    bool isTarget = beliefIsTarget[currId];
-
-                    if(isTarget){
-                        // For target states we add a self-loop
-                        uaTargetStates.push_back(uaStateMap.left.at(std::make_pair(currId, std::numeric_limits<uint64_t>::max())));
-                        uaTransitions[uaStateMap.left.at(std::make_pair(currId, std::numeric_limits<uint64_t>::max()))] = {{{uaStateMap.left.at(std::make_pair(currId, std::numeric_limits<uint64_t>::max())), storm::utility::one<ValueType>()}}};
-                    } else {
-                        uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front());
-                        //Triangulate the current belief to determine its approximation bases
-                        std::vector<std::vector<ValueType>> subSimplex;
-                        std::vector<ValueType> lambdas;
-                        if (cacheSubsimplices && subSimplexCache.count(currId) > 0) {
-                            subSimplex = subSimplexCache[currId];
-                            lambdas = lambdaCache[currId];
-                        } else {
-                            std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
-                                    beliefList[currId].probabilities, observationResolutionVector[beliefList[currId].observation]);
-                            subSimplex = temp.first;
-                            lambdas = temp.second;
-                            if(cacheSubsimplices){
-                                subSimplexCache[currId] = subSimplex;
-                                lambdaCache[currId] = lambdas;
-                            }
-                        }
-                        std::deque<std::pair<uint64_t,uint64_t>> approxToExpand;
-                        std::vector<std::map<uint64_t, ValueType>> approxActionTransitions(numChoices);
-                        for (size_t j = 0; j < lambdas.size(); ++j) {
-                            if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())){
-                                uint64_t approxId = getBeliefIdInVector(beliefGrid, beliefList[currId].observation, subSimplex[j]);
-                                //STORM_PRINT("ApproxId " << approxId << std::endl)
-                                if (approxId == uint64_t(-1)) {
-                                    // If the approximation base is not yet in the grid, we add it and it has to be expanded
-                                    storm::pomdp::Belief<ValueType> gridBelief = {nextId, beliefList[currId].observation, subSimplex[j]};
-                                    beliefList.push_back(gridBelief);
-                                    beliefGrid.push_back(gridBelief);
-                                    beliefIsTarget.push_back(targetObservations.find(beliefList[currId].observation) != targetObservations.end());
-                                    for(uint64_t action=0; action < numChoices; ++action) {
-                                        approxToExpand.push_back(std::make_pair(nextId, action));
-                                        uaStateMap.insert(uamap_type::value_type(std::make_pair(nextId, action), uaStateId));
-                                        approxActionTransitions[action][uaStateId] = lambdas[j];
-                                        ++uaStateId;
-                                    }
-                                    ++nextId;
-                                } else if(uaStateMap.left.find(std::pair<uint64_t,uint64_t>(approxId,0)) != uaStateMap.left.end()){
-                                    // we can check only for (approxId,0) as that always exists if the grid state is mapped
-                                    for(uint64_t action=0; action < numChoices; ++action) {
-                                        approxActionTransitions[action][uaStateMap.left.at(std::make_pair(approxId,action))] = lambdas[j];
-                                    }
-                                } else {
-                                    for(uint64_t action=0; action < numChoices; ++action) {
-                                        approxToExpand.push_back(std::make_pair(approxId, action));
-                                        uaStateMap.insert(uamap_type::value_type(std::make_pair(approxId, action), uaStateId));
-                                        approxActionTransitions[action][uaStateId] = lambdas[j];
-                                        ++uaStateId;
-                                    }
-                                }
-                            }
-                        }
-                        uaTransitions[uaStateMap.left.at(std::make_pair(currId,std::numeric_limits<uint64_t>::max()))] = approxActionTransitions;
-                        // Now expand all approximation bases
-                        while(!approxToExpand.empty()){
-                            uint64_t approxId = approxToExpand.front().first;
-                            uint64_t approxAction = approxToExpand.front().second;
-                            approxToExpand.pop_front();
-
-                            // Iterate over all actions and determine the successor states
-                            std::map<uint64_t, ValueType> transitionsInAction;
-                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[approxId], approxAction);
-                            for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
-                                uint32_t observation = iter->first;
-                                uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[approxId], approxAction,
-                                                                                           observation, nextId);
-                                nextId = beliefList.size();
-                                if(uaStateMap.left.find(std::make_pair(idNextBelief, std::numeric_limits<uint64_t>::max())) == uaStateMap.left.end()){
-                                    // add state to the mapping and set it t be expanded
-                                    uaStateMap.insert(uamap_type::value_type(std::make_pair(idNextBelief,std::numeric_limits<uint64_t>::max()), uaStateId));
-                                    ++uaStateId;
-                                    beliefsToBeExpanded.push_back(idNextBelief);
-                                }
-                                transitionsInAction[uaStateMap.left.at(std::make_pair(idNextBelief,std::numeric_limits<uint64_t>::max()))] = iter->second;
-                            }
-                            uaTransitions[uaStateMap.left.at(std::make_pair(approxId,approxAction))] = {transitionsInAction};
-                        }
-                    }
-                }
-
-
-                std::vector<std::vector<std::map<uint64_t, ValueType>>> uaTransitionVector;
-                for(auto iter = uaTransitions.begin(); iter != uaTransitions.end(); ++iter){
-                    uaTransitionVector.push_back(iter->second);
-                }
-                STORM_PRINT(uaTransitions.size() << std::endl)
-
-                storm::models::sparse::StateLabeling uaLabeling(uaTransitions.size());
-                uaLabeling.addLabel("init");
-                uaLabeling.addLabel("target");
-                uaLabeling.addLabel("belief");
-                uaLabeling.addLabel("grid");
-                uaLabeling.addLabelToState("init", uaStateMap.left.at(std::make_pair(initialBelief.id,std::numeric_limits<uint64_t>::max())));
-                for (auto targetState : uaTargetStates) {
-                    uaLabeling.addLabelToState("target", targetState);
-                }
-                for (auto &iter : uaStateMap.right) {
-                    std::stringstream mapEntryStr;
-                    mapEntryStr << std::to_string(iter.first);
-                    mapEntryStr << " --> ";
-                    mapEntryStr << "[{" + std::to_string(beliefList[iter.second.first].observation) << "} | " ;
-                    for(uint64_t state = 0; state < beliefList[iter.second.first].probabilities.size(); ++state){
-                        if(beliefList[iter.second.first].probabilities[state] > storm::utility::zero<ValueType>()){
-                            mapEntryStr << std::to_string(state) << " : " << beliefList[iter.second.first].probabilities[state] << ", ";
-                        }
-                    }
-                    mapEntryStr << "]" << std::endl;
-                    STORM_PRINT(mapEntryStr.str());
-                    if(!uaLabeling.containsLabel(mapEntryStr.str())){
-                        uaLabeling.addLabel(mapEntryStr.str());
-                    }
-                    if(iter.second.second == std::numeric_limits<uint64_t>::max()){
-                        uaLabeling.addLabelToState("belief", iter.first);
-                    } else {
-                        uaLabeling.addLabelToState("grid", iter.first);
-                    }
-                    uaLabeling.addLabelToState(mapEntryStr.str(), iter.first);
-                }
-
-                //STORM_PRINT(buildTransitionMatrix(uaTransitionVector))
-                storm::storage::sparse::ModelComponents<ValueType, RewardModelType> uaModelComponents(buildTransitionMatrix(uaTransitionVector), uaLabeling);
-                storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(uaModelComponents);
-                if (computeRewards) {
-                    storm::models::sparse::StandardRewardModel<ValueType> uaMdpRewardModel(boost::none, std::vector<ValueType>(uaModelComponents.transitionMatrix.getRowCount()));
-                    for (auto const &iter : uaStateMap.left) {
-                        auto currentBelief = beliefList[iter.first.first];
-                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                        for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
-                            // Add the reward
-                            uaMdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                                getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                     currentBelief));
-                        }
-                    }
-                    underApproxMdp.addRewardModel("std", uaMdpRewardModel);
-                    underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
-                }
-                underApproxMdp.printModelInformationToStream(std::cout);
-
-                auto uaModel = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
-                auto uaModelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(uaModel);
-                storm::api::exportSparseModelAsDot(uaModelPtr, "ua_model.dot");
-                auto uaTask = storm::api::createTask<ValueType>(property, false);
-                storm::utility::Stopwatch underApproxTimer(true);
-                std::unique_ptr<storm::modelchecker::CheckResult> uaRes(storm::api::verifyWithSparseEngine<ValueType>(uaModelPtr, uaTask));
-                underApproxTimer.stop();
-                STORM_LOG_ASSERT(uaRes, "Result not exist.");
-                uaRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
-                auto underApproxResultMap = uaRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
-                auto underApprox = underApproxResultMap[uaStateMap.left.at(std::make_pair(initialBelief.id, std::numeric_limits<uint64_t>::max()))];
-                */
-                auto underApprox = weightedSumUnderMap[initialBelief.id];
+                //auto underApprox = weightedSumUnderMap[initialBelief.id];
+                auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
                 STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
                 STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
 
@@ -837,10 +665,10 @@ namespace storm {
 
                 // Now onto the under-approximation
                 storm::utility::Stopwatch underApproxTimer(true);
-                ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
-                                                                  result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);
+                /*ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
+                                                                  result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/
                 underApproxTimer.stop();
-
+                auto underApprox = storm::utility::zero<ValueType>();
                 STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl
                                                             << "Time Overapproximation: " << overApproxTimer
                                                             << std::endl
@@ -874,64 +702,60 @@ namespace storm {
                                                                                                 std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
                                                                                                 std::vector<bool> &beliefIsTarget,
                                                                                                 std::set<uint32_t> const &targetObservations,
-                                                                                                std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
-                                                                                                std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
-                                                                                                std::map<uint64_t, ValueType> &result,
-                                                                                                std::map<uint64_t, std::vector<uint64_t>> &chosenActions,
-                                                                                                uint64_t gridResolution, uint64_t initialBeliefId, bool min,
-                                                                                                bool computeRewards, bool generateMdp) {
+                                                                                                uint64_t initialBeliefId, bool min,
+                                                                                                bool computeRewards, uint64_t maxModelSize) {
                 std::set<uint64_t> visitedBelieves;
                 std::deque<uint64_t> believesToBeExpanded;
                 std::map<uint64_t, uint64_t> beliefStateMap;
-                std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions;
-                std::vector<uint64_t> targetStates;
+                std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
+                                                                                       {{{1, storm::utility::one<ValueType>()}}}};
+                std::vector<uint64_t> targetStates = {1};
 
-                uint64_t stateId = 0;
+                uint64_t stateId = 2;
                 beliefStateMap[initialBeliefId] = stateId;
                 ++stateId;
+                uint64_t nextId = beliefList.size();
+                uint64_t counter = 0;
 
                 // Expand the believes
                 visitedBelieves.insert(initialBeliefId);
                 believesToBeExpanded.push_back(initialBeliefId);
                 while (!believesToBeExpanded.empty()) {
+                    //TODO think of other ways to stop exploration besides model size
                     auto currentBeliefId = believesToBeExpanded.front();
-                    std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
+                    uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front());
                     // for targets, we only consider one action with one transition
                     if (beliefIsTarget[currentBeliefId]) {
-                        // add a self-loop to target states and save them
-                        std::map<uint64_t, ValueType> transitionsInStateWithAction;
-                        transitionsInStateWithAction[beliefStateMap[currentBeliefId]] = storm::utility::one<ValueType>();
+                        // add a self-loop to target states 
                         targetStates.push_back(beliefStateMap[currentBeliefId]);
-                        actionTransitionStorage.push_back(transitionsInStateWithAction);
+                        transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one<ValueType>()}}});
+                    } else if (counter > maxModelSize) {
+                        transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
                     } else {
-                        if (chosenActions.find(currentBeliefId) == chosenActions.end()) {
-                            chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations,
-                                                                                              observationProbabilities,
-                                                                                              nextBelieves, result, gridResolution,
-                                                                                              currentBeliefId, beliefList.size(), min) :
-                                                             extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations,
-                                                                               observationProbabilities,
-                                                                               nextBelieves, result, gridResolution,
-                                                                               currentBeliefId, beliefList.size(), min);
-                        }
                         // Iterate over all actions and add the corresponding transitions
-                        for (auto const &action : chosenActions[currentBeliefId]) {
+                        std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
+                        //TODO add a way to extract the actions from the over-approx and use them here?
+                        for (uint64_t action = 0; action < numChoices; ++action) {
                             std::map<uint64_t, ValueType> transitionsInStateWithAction;
-
-                            for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) {
+                            std::map<uint32_t, ValueType> observationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currentBeliefId], action);
+                            for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) {
                                 uint32_t observation = iter->first;
-                                uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation];
+                                uint64_t nextBeliefId = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId],
+                                                                                           action,
+                                                                                           observation, nextId);
+                                nextId = beliefList.size();
                                 if (visitedBelieves.insert(nextBeliefId).second) {
                                     beliefStateMap[nextBeliefId] = stateId;
                                     ++stateId;
                                     believesToBeExpanded.push_back(nextBeliefId);
+                                    ++counter;
                                 }
                                 transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second;
                             }
                             actionTransitionStorage.push_back(transitionsInStateWithAction);
                         }
+                        transitions.push_back(actionTransitionStorage);
                     }
-                    transitions.push_back(actionTransitionStorage);
                     believesToBeExpanded.pop_front();
                 }
 
@@ -949,44 +773,28 @@ namespace storm {
                     transitionMatrix.makeRowGroupingTrivial();
                 }
                 storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(transitionMatrix, labeling);
-                if (transitionMatrix.hasTrivialRowGrouping()) {
-
-                    storm::models::sparse::Dtmc<ValueType, RewardModelType> underApproxMc(modelComponents);
-                    storm::models::sparse::StandardRewardModel<ValueType> rewardModel(std::vector<ValueType>(beliefStateMap.size()));
-                    if (computeRewards) {
-                        for (auto const &iter : beliefStateMap) {
-                            auto currentBelief = beliefList[iter.first];
-                            // Add the reward collected by taking the chosen Action in the belief
-                            rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex(
-                                    storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])),
-                                                                                         currentBelief));
-                        }
-                    }
-                    underApproxMc.addRewardModel("std", rewardModel);
-                    underApproxMc.restrictRewardModels(std::set<std::string>({"std"}));
-
-                    model = std::make_shared<storm::models::sparse::Dtmc<ValueType, RewardModelType>>(underApproxMc);
-                } else {
-                    storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
-                    if (computeRewards) {
-                        storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
-                        for (auto const &iter : beliefStateMap) {
-                            auto currentBelief = beliefList[iter.first];
-                            auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
-                            for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
-                                // Add the reward
-                                rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
-                                                                 getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
-                                                                                      currentBelief));
-                            }
+                storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
+                if (computeRewards) {
+                    storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
+                    for (auto const &iter : beliefStateMap) {
+                        auto currentBelief = beliefList[iter.first];
+                        auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
+                        for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
+                            // Add the reward
+                            rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
+                                                             getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
+                                                                                  currentBelief));
                         }
-                        underApproxMdp.addRewardModel("std", rewardModel);
-                        underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
                     }
-                    model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
+                    underApproxMdp.addRewardModel("std", rewardModel);
+                    underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
                 }
+                model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
+
                 model->printModelInformationToStream(std::cout);
 
+                storm::api::exportSparseModelAsDot(model, "ua_model.dot");
+
                 std::string propertyString;
                 if (computeRewards) {
                     propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
@@ -996,10 +804,13 @@ namespace storm {
                 std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
                 std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
 
-                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, true)));
+                std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
                 STORM_LOG_ASSERT(res, "Result does not exist.");
-                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates()));
-                return res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap().begin()->second;
+                res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
+                auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
+                auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]];
+
+                return underApprox;
             }
 
 
@@ -1406,4 +1217,4 @@ namespace storm {
 #endif
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index df3e72af9..867f9d673 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -80,7 +80,7 @@ namespace storm {
                 computeFirstRefinementStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                            std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
                                            bool computeRewards, double explorationThreshold, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                           boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);
+                                           boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
 
                 /**
                  *
@@ -96,7 +96,7 @@ namespace storm {
                                        std::set<uint32_t> const &targetObservations, bool min,
                                        std::vector<uint64_t> &observationResolutionVector, bool computeRewards, double explorationThreshold,
                                        boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
-                                       boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);
+                                       boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
 
                 /**
                  *
@@ -164,24 +164,17 @@ namespace storm {
                  * @param beliefList
                  * @param beliefIsTarget
                  * @param targetObservations
-                 * @param observationProbabilities
-                 * @param nextBelieves
-                 * @param result
-                 * @param chosenActions
-                 * @param gridResolution
                  * @param initialBeliefId
                  * @param min
+                 * @param computeReward
+                 * @param maxModelSize
                  * @return
                  */
                 ValueType computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                     std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
                                                     std::vector<bool> &beliefIsTarget,
                                                     std::set<uint32_t> const &targetObservations,
-                                                    std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
-                                                    std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
-                                                    std::map<uint64_t, ValueType> &result,
-                                                    std::map<uint64_t, std::vector<uint64_t>> &chosenActions,
-                                                    uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp);
+                                                    uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize);
 
                 /**
                  *