From 7504f6f315a7d60dc2227425560cb26cdefc106d Mon Sep 17 00:00:00 2001 From: TimQu Date: Tue, 28 Apr 2020 08:28:39 +0200 Subject: [PATCH] Improved statistics output for refinements, added detection of fixpoints --- .../ApproximatePOMDPModelchecker.cpp | 108 ++++++++++++++---- .../ApproximatePOMDPModelchecker.h | 6 +- 2 files changed, 90 insertions(+), 24 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 03b2c2105..5ebd52685 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -294,11 +294,8 @@ namespace storm { STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { - if (storm::utility::resources::isTerminate()) { - break; - } - ++statistics.refinementSteps.get(); - + bool overApproxFixPoint = true; + bool underApproxFixPoint = true; if (options.discretize) { // Refine over-approximation if (min) { @@ -310,12 +307,12 @@ namespace storm { overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one() - overApproxHeuristicPar.observationThreshold); overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; - buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); - if (overApproximation->hasComputedValues()) { + overApproxFixPoint = buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); if (betterBound) { - STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); } } else { break; @@ -327,17 +324,50 @@ namespace storm { underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); - if (underApproximation->hasComputedValues()) { + underApproxFixPoint = buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); if (betterBound) { - STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); } } else { break; } } + + if (storm::utility::resources::isTerminate()) { + break; + } else { + ++statistics.refinementSteps.get(); + // Don't make too many outputs (to avoid logfile clutter) + if (statistics.refinementSteps.get() <= 1000) { + STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); + bool computingLowerBound = false; + bool computingUpperBound = false; + if (options.discretize) { + STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingLowerBound : computingUpperBound) = true; + } + if (options.unfold) { + STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingUpperBound : computingLowerBound) = true; + } + if (computingLowerBound && computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); + } else if (computingLowerBound) { + STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); + } else if (computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); + } + STORM_PRINT_AND_LOG(std::endl); + STORM_LOG_WARN_COND(statistics.refinementSteps.get() == 1000, "Refinement requires more than 1000 iterations."); + } + } + if (overApproxFixPoint && underApproxFixPoint) { + STORM_PRINT_AND_LOG("Refinement fixpoint reached after " << statistics.refinementSteps.get() << " iterations." << std::endl); + break; + } } } @@ -399,7 +429,10 @@ namespace storm { } template - void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + + // Detect whether the refinement reached a fixpoint. + bool fixPoint = true; // current maximal resolution (needed for refinement heuristic) uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); @@ -418,7 +451,10 @@ namespace storm { overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); // We also need to find out which observation resolutions needs refinement. auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); - // Potentially increase the observationThreshold so that at least one observation actually gets refinement. + // If there is a score < 1, we have not reached a fixpoint, yet + if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one();})) { + fixPoint = false; + } refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { @@ -447,10 +483,14 @@ namespace storm { if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { STORM_LOG_INFO("Exploration time limit exceeded."); timeLimitExceeded = true; + fixPoint = false; } uint64_t currId = overApproximation->exploreNextState(); - + bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior(); + if (!hasOldBehavior) { + fixPoint = false; // Exploring a new state! + } uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { overApproximation->setCurrentStateIsTarget(); @@ -475,7 +515,7 @@ namespace storm { bool restoreAllActions = false; bool checkRewireForAllActions = false; ValueType gap = storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()); - if (!refine || !overApproximation->currentStateHasOldBehavior()) { + if (!hasOldBehavior) { // Case 1 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { @@ -489,9 +529,19 @@ namespace storm { // Case 2 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { exploreAllActions = true; // Case 2.1 + fixPoint = false; } else { truncateAllActions = true; // Case 2.2 overApproximation->setCurrentStateIsTruncated(); + if (fixPoint) { + // Properly check whether this can still be a fixpoint + if (overApproximation->currentStateIsOptimalSchedulerReachable()) { + fixPoint = false; + } + //} else { + // In this case we truncated a state that is not reachable under optimal schedulers. + // If no other state is explored (i.e. fixPoint remaints true), these states should still not be reachable in subsequent iterations + } } } else { // Case 3 @@ -516,6 +566,7 @@ namespace storm { // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states) assert(overApproximation->currentStateHasOldBehavior()); if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { + fixPoint = false; // Then, check whether the other criteria for rewiring are satisfied if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { // Do the rewiring now! (Case 3.1) @@ -581,9 +632,8 @@ namespace storm { statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); } statistics.overApproximationBuildTime.stop(); - return; + return false; } - statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); @@ -591,12 +641,18 @@ namespace storm { statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); + + // don't overwrite statistics of a previous, successful computation + if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) { + statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates(); + } + return fixPoint; } template - void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { - + bool ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); + bool fixPoint = true; if (heuristicParameters.sizeThreshold != std::numeric_limits::max()) { statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; } @@ -626,12 +682,15 @@ namespace storm { uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); + bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); + if (!stateAlreadyExplored || timeLimitExceeded) { + fixPoint = false; + } if (targetObservations.count(currObservation) != 0) { underApproximation->setCurrentStateIsTarget(); underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); if (timeLimitExceeded) { stopExploration = true; underApproximation->setCurrentStateIsTruncated(); @@ -691,9 +750,8 @@ namespace storm { statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); } statistics.underApproximationBuildTime.stop(); - return; + return false; } - statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); @@ -701,6 +759,12 @@ namespace storm { statistics.underApproximationCheckTime.start(); underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); + + // don't overwrite statistics of a previous, successful computation + if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) { + statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates(); + } + return fixPoint; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5a2cd683f..44127e011 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -74,13 +74,15 @@ namespace storm { /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties + * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) */ - void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties + * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) */ - void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); + bool buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);