From aca676a0a5dcfa2382672857af7bff0c5828304c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Mon, 9 Dec 2019 09:48:09 +0100 Subject: [PATCH 1/3] Added model generation and checking for initial approximation bounds --- .../ApproximatePOMDPModelchecker.cpp | 116 ++++++++++++++---- .../ApproximatePOMDPModelchecker.h | 21 ++-- 2 files changed, 104 insertions(+), 33 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 37a29f9b1..27cbdb2bc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -10,6 +10,7 @@ #include "storm/modelchecker/results/CheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" @@ -21,7 +22,7 @@ namespace storm { ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { precision = 0.000000001; cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); - useMdp = false; + useMdp = true; maxIterations = 1000; } @@ -51,6 +52,51 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { + //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on + + // we define some positional scheduler for the POMDP as an experimental lower bound + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + // select a random action for all states with the same observation + uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); + for (auto const &state : obsStates) { + pomdpScheduler.setChoice(chosenAction, state); + } + } + + storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); + underlyingMdpLabeling.addLabel("goal"); + std::vector goalStates; + for (auto const &targetObs : targetObservations) { + for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { + underlyingMdpLabeling.addLabelToState("goal", goalState); + } + } + storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); + auto underlyingModel = std::static_pointer_cast>( + std::make_shared>(underlyingMdp)); + std::string initPropString = computeRewards ? "R" : "P"; + initPropString += min ? "min" : "max"; + initPropString += "=? [F \"goal\"]"; + std::vector propVector = storm::api::parseProperties(initPropString); + std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); + underlyingMdp.printModelInformationToStream(std::cout); + + std::unique_ptr underlyingRes( + storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); + STORM_LOG_ASSERT(underlyingRes, "Result not exist."); + underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); + auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + + auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + underApproxModel->printModelInformationToStream(std::cout); + std::unique_ptr underapproxRes( + storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); + STORM_LOG_ASSERT(underapproxRes, "Result not exist."); + underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); + auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) std::vector> beliefList; std::vector beliefIsTarget; @@ -70,6 +116,7 @@ namespace storm { // current ID -> action -> reward std::map> beliefActionRewards; + std::vector hintVector; uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); // Initial belief always has ID 0 @@ -107,6 +154,8 @@ namespace storm { initInserted = true; beliefGrid.push_back(initialBelief); beliefsToBeExpanded.push_back(0); + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); } else { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; @@ -116,6 +165,9 @@ namespace storm { beliefsToBeExpanded.push_back(nextId); ++nextId; + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); + beliefStateMap[nextId] = mdpStateId; initTransitionInActionBelief[mdpStateId] = initLambdas[j]; ++nextId; @@ -131,9 +183,11 @@ namespace storm { mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + std::map weightedSumOverMap; + std::map weightedSumUnderMap; + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); @@ -196,9 +250,22 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); + // compute overapproximate value using MDP result map + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + + if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { + hintVector.push_back(tempWeightedSumOver); + } + beliefsToBeExpanded.push_back(nextId); + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; beliefStateMap[nextId] = mdpStateId; transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; @@ -267,12 +334,15 @@ namespace storm { } overApproxMdp.printModelInformationToStream(std::cout); + /* storm::utility::Stopwatch overApproxTimer(true); auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache, result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop(); + overApproxTimer.stop();*/ + auto underApprox = storm::utility::zero(); + auto overApprox = storm::utility::one(); /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, @@ -282,10 +352,10 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl);*/ + << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);*/ auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); @@ -298,7 +368,13 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + + auto task = storm::api::createTask(property, true); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(hintVector); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); @@ -561,7 +637,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map> chosenActions, + std::map> &chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards, bool generateMdp) { std::set visitedBelieves; @@ -689,8 +765,7 @@ namespace storm { template storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix( - std::vector>> transitions) { + ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { uint_fast64_t currentRow = 0; uint_fast64_t currentRowGroup = 0; uint64_t nrColumns = transitions.size(); @@ -809,7 +884,7 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> const &grid, uint32_t observation, - std::vector probabilities) { + std::vector &probabilities) { // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { @@ -929,7 +1004,7 @@ namespace storm { template std::pair>, std::vector> ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::vector probabilities, uint64_t resolution) { + std::vector &probabilities, uint64_t resolution) { // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper uint64_t probSize = probabilities.size(); @@ -989,7 +1064,7 @@ namespace storm { std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) @@ -1009,10 +1084,8 @@ namespace storm { template storm::pomdp::Belief - ApproximatePOMDPModelchecker::getBeliefAfterAction( - storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, - uint64_t actionIndex, uint64_t id) { + ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -1030,9 +1103,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, - uint32_t observation, - uint64_t id) { + std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, + uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -1075,7 +1147,7 @@ namespace storm { template ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, - uint64_t action, storm::pomdp::Belief belief) { + uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6d8b50d43..aada759d2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -141,7 +141,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map> chosenActions, + std::map> &chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** @@ -161,7 +161,7 @@ namespace storm { * @return */ std::pair>, std::vector> - computeSubSimplexAndLambdas(std::vector probabilities, uint64_t gridResolution); + computeSubSimplexAndLambdas(std::vector &probabilities, uint64_t gridResolution); /** @@ -188,7 +188,7 @@ namespace storm { */ std::map computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex); /** @@ -201,13 +201,12 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - uint64_t - getBeliefAfterActionAndObservation( + uint64_t getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id); /** @@ -219,8 +218,8 @@ namespace storm { * @return */ storm::pomdp::Belief - getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id); + getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, + uint64_t id); /** * Helper to get the id of a Belief stored in a given vector structure @@ -230,15 +229,15 @@ namespace storm { * @return */ uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::vector probabilities); + std::vector &probabilities); /** * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) * @return sparseMatrix representing the transitions */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); + storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); - ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief &belief); ValueType overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, From f416cc82911c945dab0e54896e62daf030e36b66 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 10 Dec 2019 12:23:26 +0100 Subject: [PATCH 2/3] Added flag to toggle caching of subsimplex and lambda values --- .../ApproximatePOMDPModelchecker.cpp | 96 ++++++++++--------- .../ApproximatePOMDPModelchecker.h | 1 + 2 files changed, 54 insertions(+), 43 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 27cbdb2bc..3939913c6 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -24,6 +24,7 @@ namespace storm { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); useMdp = true; maxIterations = 1000; + cacheSubsimplices = false; } template @@ -81,6 +82,7 @@ namespace storm { initPropString += "=? [F \"goal\"]"; std::vector propVector = storm::api::parseProperties(initPropString); std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); + STORM_PRINT("Underlying MDP" << std::endl) underlyingMdp.printModelInformationToStream(std::cout); std::unique_ptr underlyingRes( @@ -90,6 +92,7 @@ namespace storm { auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + STORM_PRINT("Random Positional Scheduler" << std::endl) underApproxModel->printModelInformationToStream(std::cout); std::unique_ptr underapproxRes( storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); @@ -138,13 +141,15 @@ namespace storm { std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; - bool initInserted = false; + if(cacheSubsimplices){ + subSimplexCache[0] = initSubSimplex; + lambdaCache[0] = initLambdas; + } + std::vector> initTransitionsInBelief; std::map initTransitionInActionBelief; - + bool initInserted = false; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); @@ -183,13 +188,14 @@ namespace storm { mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting std::map weightedSumOverMap; std::map weightedSumUnderMap; // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points while (!beliefsToBeExpanded.empty()) { + // TODO add direct generation of transition matrix uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; @@ -230,8 +236,7 @@ namespace storm { //Triangulate here and put the possibly resulting belief in the grid std::vector> subSimplex; std::vector lambdas; - if (subSimplexCache.count(idNextBelief) > 0) { - // TODO is this necessary here? Think later + if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { @@ -239,8 +244,10 @@ namespace storm { beliefList[idNextBelief].probabilities, gridResolution); subSimplex = temp.first; lambdas = temp.second; - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + if(cacheSubsimplices){ + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } } for (size_t j = 0; j < lambdas.size(); ++j) { @@ -307,6 +314,8 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + //auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache,result, chosenActions, gridResolution, min, computeRewards); + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); @@ -334,29 +343,6 @@ namespace storm { } overApproxMdp.printModelInformationToStream(std::cout); - /* - storm::utility::Stopwatch overApproxTimer(true); - auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, - subSimplexCache, lambdaCache, - result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop();*/ - - auto underApprox = storm::utility::zero(); - auto overApprox = storm::utility::one(); - /* - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); - underApproxTimer.stop(); - - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);*/ - auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); std::vector parameterNames; @@ -367,17 +353,26 @@ namespace storm { propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - auto task = storm::api::createTask(property, true); auto hint = storm::modelchecker::ExplicitModelCheckerHint(); hint.setResultHint(hintVector); auto hintPtr = std::make_shared>(hint); task.setHint(hintPtr); + storm::utility::Stopwatch overApproxTimer(true); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproxTimer.stop(); STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + auto overApprox = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + /* storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + underApproxTimer.stop();*/ + + STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) + auto underApprox = storm::utility::zero(); + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } @@ -425,7 +420,7 @@ namespace storm { // cache the values to not always re-calculate std::vector> subSimplex; std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { + if (cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { @@ -433,8 +428,10 @@ namespace storm { gridResolution); subSimplex = temp.first; lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; + if(cacheSubsimplices) { + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } } auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { @@ -477,11 +474,21 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + std::vector initialLambda; + std::vector> initialSubsimplex; + if(cacheSubsimplices){ + initialLambda = lambdaCache[0]; + initialSubsimplex = subSimplexCache[0]; + } else { + auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution); + initialSubsimplex= temp.first; + initialLambda = temp.second; + } auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < lambdaCache[0].size(); ++j) { - if (lambdaCache[0][j] != storm::utility::zero()) { - overApprox += lambdaCache[0][j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, subSimplexCache[0][j])]; + for (size_t j = 0; j < initialLambda.size(); ++j) { + if (initialLambda[j] != storm::utility::zero()) { + overApprox += initialLambda[j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, initialSubsimplex[j])]; } } return overApprox; @@ -541,8 +548,10 @@ namespace storm { std::map> lambdaCache; std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); - subSimplexCache[0] = temp.first; - lambdaCache[0] = temp.second; + if(cacheSubsimplices) { + subSimplexCache[0] = temp.first; + lambdaCache[0] = temp.second; + } storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { @@ -553,6 +562,7 @@ namespace storm { } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index aada759d2..e6f935a72 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -253,6 +253,7 @@ namespace storm { storm::utility::ConstantsComparator cc; double precision; bool useMdp; + bool cacheSubsimplices; uint64_t maxIterations; }; From 94b93f013caf783756c88d09970f7b74ae48403f Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 13 Dec 2019 14:24:36 +0100 Subject: [PATCH 3/3] Added option to stop approximation space exploration early if difference between over- and under-approximation is under a given threshold --- .../settings/modules/POMDPSettings.cpp | 24 ++-- .../settings/modules/POMDPSettings.h | 2 + src/storm-pomdp-cli/storm-pomdp.cpp | 2 +- .../ApproximatePOMDPModelchecker.cpp | 115 ++++++++++++------ .../ApproximatePOMDPModelchecker.h | 4 +- 5 files changed, 95 insertions(+), 52 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 617cebf36..08f1e8d50 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -15,6 +15,7 @@ namespace storm { const std::string POMDPSettings::moduleName = "pomdp"; const std::string exportAsParametricModelOption = "parametric-drn"; const std::string gridApproximationOption = "gridapproximation"; + const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string qualitativeReductionOption = "qualitativereduction"; const std::string analyzeUniqueObservationsOption = "uniqueobservations"; const std::string mecReductionOption = "mecreduction"; @@ -38,13 +39,9 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); - this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false, - "Analyze the POMDP using grid approximation.").addArgument( - storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution", - "the resolution of the grid").setDefaultValueUnsignedInteger( - 10).addValidatorUnsignedInteger( - storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator( - 0)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, gridApproximationOption, false,"Analyze the POMDP using grid approximation.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("resolution","the resolution of the grid").setDefaultValueUnsignedInteger(10).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, limitBeliefExplorationOption, false,"Sets whether to early in the belief space exploration if upper and lower bound are close").addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("threshold","the difference between upper and lower bound when to stop").setDefaultValueDouble(0.0).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); } bool POMDPSettings::isExportToParametricSet() const { @@ -76,10 +73,17 @@ namespace storm { } uint64_t POMDPSettings::getGridResolution() const { - return this->getOption(gridApproximationOption).getArgumentByName( - "resolution").getValueAsUnsignedInteger(); + return this->getOption(gridApproximationOption).getArgumentByName("resolution").getValueAsUnsignedInteger(); } - + + bool POMDPSettings::isLimitExplorationSet() const { + return this->getOption(limitBeliefExplorationOption).getHasOptionBeenSet(); + } + + double POMDPSettings::getExplorationThreshold() const { + return this->getOption(limitBeliefExplorationOption).getArgumentByName("threshold").getValueAsDouble(); + } + uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 1e68871e8..c0c9ecf44 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -27,6 +27,7 @@ namespace storm { bool isQualitativeReductionSet() const; bool isGridApproximationSet() const; + bool isLimitExplorationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; bool isSelfloopReductionSet() const; @@ -36,6 +37,7 @@ namespace storm { uint64_t getMemoryBound() const; uint64_t getGridResolution() const; + double getExplorationThreshold() const; storm::storage::PomdpMemoryPattern getMemoryPattern() const; bool check() const override; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 0af965ddd..8b28da0bf 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -203,7 +203,7 @@ int main(const int argc, const char** argv) { //result = checker.refineReachabilityProbability(*pomdp, targetObservationSet,probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(),1,10); result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution()); + pomdpSettings.getGridResolution(), pomdpSettings.getExplorationThreshold()); overRes = result->OverapproximationValue; underRes = result->UnderapproximationValue; if (overRes != underRes) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 3939913c6..fd8502563 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -52,20 +52,9 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, - bool computeRewards) { + bool computeRewards, double explorationThreshold) { //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on - - // we define some positional scheduler for the POMDP as an experimental lower bound - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - // select a random action for all states with the same observation - uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); - for (auto const &state : obsStates) { - pomdpScheduler.setChoice(chosenAction, state); - } - } - + storm::utility::Stopwatch underlyingWatch(true); storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); underlyingMdpLabeling.addLabel("goal"); std::vector goalStates; @@ -84,13 +73,23 @@ namespace storm { std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); STORM_PRINT("Underlying MDP" << std::endl) underlyingMdp.printModelInformationToStream(std::cout); - - std::unique_ptr underlyingRes( - storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); + std::unique_ptr underlyingRes(storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underlyingRes, "Result not exist."); underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + underlyingWatch.stop(); + storm::utility::Stopwatch positionalWatch(true); + // we define some positional scheduler for the POMDP as an experimental lower bound + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + // select a random action for all states with the same observation + uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); + for (auto const &state : obsStates) { + pomdpScheduler.setChoice(chosenAction, state); + } + } auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); STORM_PRINT("Random Positional Scheduler" << std::endl) underApproxModel->printModelInformationToStream(std::cout); @@ -99,6 +98,9 @@ namespace storm { STORM_LOG_ASSERT(underapproxRes, "Result not exist."); underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + positionalWatch.stop(); + + STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) std::vector> beliefList; @@ -118,25 +120,30 @@ namespace storm { std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; - - std::vector hintVector; uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); - // Initial belief always has ID 0 + // Initial belief always has belief ID 0 storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - // These are the components to build the MDP from the grid + // These are the components to build the MDP from the grid TODO make a better stucture to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?) std::map beliefStateMap; - std::vector>> mdpTransitions; - std::vector targetStates; - uint64_t mdpStateId = 0; + // Reserve states 0 and 1 as always sink/goal states + std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}},{{{1, storm::utility::one()}}}}; + // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) + std::vector hintVector = {storm::utility::zero(), storm::utility::one()}; + std::vector targetStates = {1}; + uint64_t mdpStateId = 2; beliefStateMap[initialBelief.id] = mdpStateId; ++mdpStateId; + // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace + std::map weightedSumOverMap; + std::map weightedSumUnderMap; + // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; @@ -145,8 +152,6 @@ namespace storm { subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; } - - std::vector> initTransitionsInBelief; std::map initTransitionInActionBelief; bool initInserted = false; @@ -156,6 +161,15 @@ namespace storm { if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { if (searchResult == 0) { // the initial belief is on the grid itself + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { + tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + weightedSumOverMap[initialBelief.id] = tempWeightedSumOver; + weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder; + initInserted = true; beliefGrid.push_back(initialBelief); beliefsToBeExpanded.push_back(0); @@ -163,6 +177,17 @@ namespace storm { : storm::utility::zero()); } else { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { + tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; + storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); @@ -190,16 +215,22 @@ namespace storm { //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points + // Expand the beliefs to generate the grid on-the-fly + if(explorationThreshold > 0){ + STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) + } while (!beliefsToBeExpanded.empty()) { // TODO add direct generation of transition matrix uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; + if(cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(explorationThreshold))){ + result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : weightedSumOverMap[currId])); + mdpTransitions.push_back({{{1, weightedSumOverMap[currId]},{0, storm::utility::one() - weightedSumOverMap[currId]}}}); + continue; + } + if (isTarget) { // Depending on whether we compute rewards, we select the right initial result result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); @@ -240,8 +271,7 @@ namespace storm { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, gridResolution); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices){ @@ -268,6 +298,8 @@ namespace storm { if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { hintVector.push_back(tempWeightedSumOver); + } else { + hintVector.push_back(storm::utility::zero()); } beliefsToBeExpanded.push_back(nextId); @@ -319,11 +351,10 @@ namespace storm { storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", 0); + mdpLabeling.addLabelToState("init", beliefStateMap[initialBelief.id]); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); if (computeRewards) { @@ -353,7 +384,7 @@ namespace storm { propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, true); + auto task = storm::api::createTask(property, false); auto hint = storm::modelchecker::ExplicitModelCheckerHint(); hint.setResultHint(hintVector); auto hintPtr = std::make_shared>(hint); @@ -362,8 +393,9 @@ namespace storm { std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); overApproxTimer.stop(); STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - auto overApprox = res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); + auto resultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto overApprox = resultMap[beliefStateMap[initialBelief.id]]; /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); @@ -374,6 +406,11 @@ namespace storm { STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + std::map differences; + for(auto const &entry : weightedSumUnderMap){ + differences[beliefStateMap[entry.first]] = resultMap[beliefStateMap[entry.first]] - weightedSumUnderMap[entry.first]; + } + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } @@ -499,15 +536,15 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution) { - return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true, 0); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution) { - return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); + uint64_t gridResolution, double explorationThreshold) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false, explorationThreshold); } template diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index e6f935a72..438354eca 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -28,7 +28,7 @@ namespace storm { std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution); + uint64_t gridResolution, double explorationThreshold); std::unique_ptr> computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, @@ -57,7 +57,7 @@ namespace storm { std::unique_ptr> computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, - uint64_t gridResolution, bool computeRewards); + uint64_t gridResolution, bool computeRewards, double explorationThreshold); /** *