From 43220759f409ec5a9cd480b9b72c3df1360c8587 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Thu, 23 Apr 2020 11:59:43 +0200 Subject: [PATCH] Implemented a time limit for exploration. --- .../modules/BeliefExplorationSettings.cpp | 19 +++++++++- .../modules/BeliefExplorationSettings.h | 3 ++ .../ApproximatePOMDPModelCheckerOptions.h | 1 + .../ApproximatePOMDPModelchecker.cpp | 38 ++++++++++++++++--- 4 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp index c1c7e45bc..b05d97d79 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp @@ -20,6 +20,7 @@ namespace storm { const std::string BeliefExplorationSettings::moduleName = "belexpl"; const std::string refineOption = "refine"; + const std::string explorationTimeLimitOption = "exploration-time"; const std::string resolutionOption = "resolution"; const std::string sizeThresholdOption = "size-threshold"; const std::string gapThresholdOption = "gap-threshold"; @@ -31,6 +32,8 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Refines the result bounds until reaching either the goal precision or the refinement step limit").addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("prec","The goal precision.").setDefaultValueDouble(1e-4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0.0)).build()).addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("steps","The number of allowed refinement steps (0 means no limit).").setDefaultValueUnsignedInteger(0).makeOptional().build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, explorationTimeLimitOption, false, "Sets after which time no further states shall be explored.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("time","In seconds.").build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(12).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, observationThresholdOption, false,"Only observations whose score is below this threshold will be refined.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Controlls how fast the threshold is increased in each refinement step (higher means more precise).").setDefaultValueDouble(0.1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); @@ -62,6 +65,14 @@ namespace storm { return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger(); } + bool BeliefExplorationSettings::isExplorationTimeLimitSet() const { + return this->getOption(explorationTimeLimitOption).getHasOptionBeenSet(); + } + + uint64_t BeliefExplorationSettings::getExplorationTimeLimit() const { + return this->getOption(explorationTimeLimitOption).getArgumentByName("time").getValueAsUnsignedInteger(); + } + uint64_t BeliefExplorationSettings::getResolutionInit() const { return this->getOption(resolutionOption).getArgumentByName("init").getValueAsUnsignedInteger(); } @@ -116,8 +127,14 @@ namespace storm { options.refinePrecision = getRefinePrecision(); if (isRefineStepLimitSet()) { options.refineStepLimit = getRefineStepLimit(); + } else { + options.refineStepLimit = boost::none; + } + if (isExplorationTimeLimitSet()) { + options.explorationTimeLimit = getExplorationTimeLimit(); + } else { + options.explorationTimeLimit = boost::none; } - options.resolutionInit = getResolutionInit(); options.resolutionFactor = storm::utility::convertNumber(getResolutionFactor()); options.sizeThresholdInit = getSizeThresholdInit(); diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h index 0273a1945..5ae3a1bde 100644 --- a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h @@ -32,6 +32,9 @@ namespace storm { bool isRefineStepLimitSet() const; uint64_t getRefineStepLimit() const; + bool isExplorationTimeLimitSet() const; + uint64_t getExplorationTimeLimit() const; + /// Discretization Resolution uint64_t getResolutionInit() const; double getResolutionFactor() const; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h index 91cfc6db0..6d977a902 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h @@ -18,6 +18,7 @@ namespace storm { bool refine = false; boost::optional refineStepLimit; ValueType refinePrecision = storm::utility::zero(); + boost::optional explorationTimeLimit; // Controlparameters for the refinement heuristic // Discretization Resolution diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 86d24848d..31bbea7ce 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -215,8 +215,12 @@ namespace storm { heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; heuristicParameters.sizeThreshold = options.sizeThresholdInit; if (heuristicParameters.sizeThreshold == 0) { - // Select a decent value automatically - heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + if (options.explorationTimeLimit) { + heuristicParameters.sizeThreshold = std::numeric_limits::max(); + } else { + heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + STORM_PRINT_AND_LOG("Heuristically selected an under-approximation mdp size threshold of " << heuristicParameters.sizeThreshold << "." << std::endl); + } } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx); if (approx->hasComputedValues()) { @@ -419,9 +423,19 @@ namespace storm { statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); // Start exploration + storm::utility::Stopwatch explorationTime; + if (options.explorationTimeLimit) { + explorationTime.start(); + } + bool timeLimitExceeded = false; std::map gatheredSuccessorObservations; // Declare here to avoid reallocations uint64_t numRewiredOrExploredStates = 0; while (overApproximation->hasUnexploredState()) { + if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + } + uint64_t currId = overApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); @@ -451,7 +465,7 @@ namespace storm { if (!refine || !overApproximation->currentStateHasOldBehavior()) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state - if (gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 1.1 } else { truncateAllActions = true; // Case 1.2 @@ -460,7 +474,7 @@ namespace storm { } else { if (overApproximation->getCurrentStateWasTruncated()) { // Case 2 - if (overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 2.1 } else { truncateAllActions = true; // Case 2.2 @@ -469,7 +483,7 @@ namespace storm { } else { // Case 3 // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the action) - if (overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { checkRewireForAllActions = true; // Case 3.1 or Case 3.2 } else { restoreAllActions = true; // Definitely Case 3.2 @@ -580,7 +594,16 @@ namespace storm { // Expand the beliefs uint64_t newlyExploredStates = 0; + storm::utility::Stopwatch explorationTime; + if (options.explorationTimeLimit) { + explorationTime.start(); + } + bool timeLimitExceeded = false; while (underApproximation->hasUnexploredState()) { + if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + } uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); @@ -590,7 +613,10 @@ namespace storm { } else { bool stopExploration = false; bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); - if (!stateAlreadyExplored) { + if (timeLimitExceeded) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (!stateAlreadyExplored) { // Check whether we want to explore the state now! if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) { stopExploration = true;