From 7504f6f315a7d60dc2227425560cb26cdefc106d Mon Sep 17 00:00:00 2001
From: TimQu <tim.quatmann@cs.rwth-aachen.de>
Date: Tue, 28 Apr 2020 08:28:39 +0200
Subject: [PATCH] Improved statistics output for refinements, added detection
 of fixpoints

---
 .../ApproximatePOMDPModelchecker.cpp          | 108 ++++++++++++++----
 .../ApproximatePOMDPModelchecker.h            |   6 +-
 2 files changed, 90 insertions(+), 24 deletions(-)
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index 03b2c2105..5ebd52685 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -294,11 +294,8 @@ namespace storm {
                 STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
                 STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined.");
                 while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) {
-                    if (storm::utility::resources::isTerminate()) {
-                        break;
-                    }
-                    ++statistics.refinementSteps.get();
-                    
+                    bool overApproxFixPoint = true;
+                    bool underApproxFixPoint = true;
                     if (options.discretize) {
                         // Refine over-approximation
                         if (min) {
@@ -310,12 +307,12 @@ namespace storm {
                         overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
                         overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
                         overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
-                        buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation);
-                        if (overApproximation->hasComputedValues()) {
+                        overApproxFixPoint = buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation);
+                        if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
                             ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
                             bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
                             if (betterBound) {
-                                STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl);
+                                STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl);
                             }
                         } else {
                             break;
@@ -327,17 +324,50 @@ namespace storm {
                         underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
                         underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
                         underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
-                        buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation);
-                        if (underApproximation->hasComputedValues()) {
+                        underApproxFixPoint = buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation);
+                        if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
                             ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
                             bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
                             if (betterBound) {
-                                STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl);
+                                STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl);
                             }
                         } else {
                             break;
                         }
                     }
+                    
+                    if (storm::utility::resources::isTerminate()) {
+                        break;
+                    } else {
+                        ++statistics.refinementSteps.get();
+                        // Don't make too many outputs (to avoid logfile clutter)
+                        if (statistics.refinementSteps.get() <= 1000) {
+                            STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << ".");
+                            bool computingLowerBound = false;
+                            bool computingUpperBound = false;
+                            if (options.discretize) {
+                                STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
+                                (min ? computingLowerBound : computingUpperBound) = true;
+                            }
+                            if (options.unfold) {
+                                STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
+                                (min ? computingUpperBound : computingLowerBound) = true;
+                            }
+                            if (computingLowerBound && computingUpperBound) {
+                                STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "].");
+                            } else if (computingLowerBound) {
+                                STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << ".");
+                            } else if (computingUpperBound) {
+                                STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << ".");
+                            }
+                            STORM_PRINT_AND_LOG(std::endl);
+                            STORM_LOG_WARN_COND(statistics.refinementSteps.get() == 1000, "Refinement requires  more than 1000 iterations.");
+                        }
+                    }
+                    if (overApproxFixPoint && underApproxFixPoint) {
+                        STORM_PRINT_AND_LOG("Refinement fixpoint reached after " << statistics.refinementSteps.get() << " iterations." << std::endl);
+                        break;
+                    }
                 }
             }
 
@@ -399,7 +429,10 @@ namespace storm {
             }
             
             template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+            bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
+                
+                // Detect whether the refinement reached a fixpoint.
+                bool fixPoint = true;
                 
                 // current maximal resolution (needed for refinement heuristic)
                 uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
@@ -418,7 +451,10 @@ namespace storm {
                     overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
                     // We also need to find out which observation resolutions needs refinement.
                     auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution);
-                    // Potentially increase the observationThreshold so that at least one observation actually gets refinement.
+                    // If there is a score < 1, we have not reached a fixpoint, yet
+                    if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one<ValueType>();})) {
+                        fixPoint = false;
+                    }
                     refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;});
                     STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
                     for (auto const& obs : refinedObservations) {
@@ -447,10 +483,14 @@ namespace storm {
                     if (!timeLimitExceeded && options.explorationTimeLimit && static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) {
                         STORM_LOG_INFO("Exploration time limit exceeded.");
                         timeLimitExceeded = true;
+                        fixPoint = false;
                     }
 
                     uint64_t currId = overApproximation->exploreNextState();
-                    
+                    bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
+                    if (!hasOldBehavior) {
+                        fixPoint = false; // Exploring a new state!
+                    }
                     uint32_t currObservation = beliefManager->getBeliefObservation(currId);
                     if (targetObservations.count(currObservation) != 0) {
                         overApproximation->setCurrentStateIsTarget();
@@ -475,7 +515,7 @@ namespace storm {
                         bool restoreAllActions = false;
                         bool checkRewireForAllActions = false;
                         ValueType gap = storm::utility::abs<ValueType>(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState());
-                        if (!refine || !overApproximation->currentStateHasOldBehavior()) {
+                        if (!hasOldBehavior) {
                             // Case 1
                             // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state
                             if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
@@ -489,9 +529,19 @@ namespace storm {
                                 // Case 2
                                 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
                                     exploreAllActions = true; // Case 2.1
+                                    fixPoint = false;
                                 } else {
                                     truncateAllActions = true; // Case 2.2
                                     overApproximation->setCurrentStateIsTruncated();
+                                    if (fixPoint) {
+                                        // Properly check whether this can still be a fixpoint
+                                        if (overApproximation->currentStateIsOptimalSchedulerReachable()) {
+                                            fixPoint = false;
+                                        }
+                                        //} else {
+                                            // In this case we truncated a state that is not reachable under optimal schedulers.
+                                            // If no other state is explored (i.e. fixPoint remaints true), these states should still not be reachable in subsequent iterations
+                                    }
                                 }
                             } else {
                                 // Case 3
@@ -516,6 +566,7 @@ namespace storm {
                                 // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states)
                                 assert(overApproximation->currentStateHasOldBehavior());
                                 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
+                                    fixPoint = false;
                                     // Then, check whether the other criteria for rewiring are satisfied
                                     if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
                                         // Do the rewiring now! (Case 3.1)
@@ -581,9 +632,8 @@ namespace storm {
                         statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
                     }
                     statistics.overApproximationBuildTime.stop();
-                    return;
+                    return false;
                 }
-                statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
                 
                 overApproximation->finishExploration();
                 statistics.overApproximationBuildTime.stop();
@@ -591,12 +641,18 @@ namespace storm {
                 statistics.overApproximationCheckTime.start();
                 overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.overApproximationCheckTime.stop();
+                
+                // don't overwrite statistics of a previous, successful computation
+                if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) {
+                    statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
+                }
+                return fixPoint;
             }
 
             template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
-                
+            bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
                 statistics.underApproximationBuildTime.start();
+                bool fixPoint = true;
                 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
                     statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
                 }
@@ -626,12 +682,15 @@ namespace storm {
                     uint64_t currId = underApproximation->exploreNextState();
                     
                     uint32_t currObservation = beliefManager->getBeliefObservation(currId);
+                    bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
+                    if (!stateAlreadyExplored || timeLimitExceeded) {
+                        fixPoint = false;
+                    }
                     if (targetObservations.count(currObservation) != 0) {
                         underApproximation->setCurrentStateIsTarget();
                         underApproximation->addSelfloopTransition();
                     } else {
                         bool stopExploration = false;
-                        bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
                         if (timeLimitExceeded) {
                             stopExploration = true;
                             underApproximation->setCurrentStateIsTruncated();
@@ -691,9 +750,8 @@ namespace storm {
                         statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
                     }
                     statistics.underApproximationBuildTime.stop();
-                    return;
+                    return false;
                 }
-                statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
                 
                 underApproximation->finishExploration();
                 statistics.underApproximationBuildTime.stop();
@@ -701,6 +759,12 @@ namespace storm {
                 statistics.underApproximationCheckTime.start();
                 underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
                 statistics.underApproximationCheckTime.stop();
+                
+                // don't overwrite statistics of a previous, successful computation
+                if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) {
+                    statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
+                }
+                return fixPoint;
 
             }
 
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index 5a2cd683f..44127e011 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -74,13 +74,15 @@ namespace storm {
                 
                 /**
                  * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
+                 * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp)
                  */
-                void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
+                bool buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
 
                 /**
                  * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
+                 * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp)
                  */
-                void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
+                bool buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
 
                 ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);