Implemented 'guessing' of initial pomdp schedulers for multiple guesses

6 years ago · 2f2a007896
4 changed files with 179 additions and 75 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -14,6 +14,7 @@
 #include "storm/storage/SparseMatrix.h"
 #include "storm/utility/macros.h"
 #include "storm-pomdp/storage/BeliefManager.h"
+#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
 #include "storm/utility/SignalHandler.h"
 #include "storm/modelchecker/results/CheckResult.h"
 #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
@ -38,7 +39,7 @@ namespace storm {
                ModelChecked
            };
            
-            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) {
+            BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds) : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) {
                // Intentionally left empty
            }
            BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
@ -544,16 +545,24 @@ namespace storm {
                return upperValueBounds[getCurrentMdpState()];
            }

-            /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties)
-            /// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
            ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
-                return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
+                STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available.");
+                auto it = pomdpValueBounds.lower.begin();
+                ValueType result = beliefManager->getWeightedSum(beliefId, *it);
+                for (++it; it != pomdpValueBounds.lower.end(); ++it) {
+                        result = std::max(result, beliefManager->getWeightedSum(beliefId, *it));
+                    }
+                return result;
            }
 
-            /// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties)
-            /// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
            ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
-                 return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
+                STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available.");
+                auto it = pomdpValueBounds.upper.begin();
+                ValueType result = beliefManager->getWeightedSum(beliefId, *it);
+                for (++it; it != pomdpValueBounds.upper.end(); ++it) {
+                        result = std::min(result, beliefManager->getWeightedSum(beliefId, *it));
+                    }
+                return result;
            }
            
            void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
@ -817,8 +826,7 @@ namespace storm {
            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
            
            // Value and scheduler related information
-            std::vector<ValueType> const& pomdpLowerValueBounds;
-            std::vector<ValueType> const& pomdpUpperValueBounds;
+            storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> pomdpValueBounds;
            std::vector<ValueType> lowerValueBounds;
            std::vector<ValueType> upperValueBounds;
            std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@ -87,7 +87,8 @@ namespace storm {
                
                // Compute some initial bounds on the values for each state of the pomdp
                auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
-                Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]);
+                uint64_t initialPomdpState = pomdp.getInitialStates().getNextSetIndex(0);
+                Result result(initialPomdpValueBounds.getHighestLowerBound(initialPomdpState), initialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
                STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " <<  result.upperBound << "]" << std::endl);

                boost::optional<std::string> rewardModelName;
@ -112,9 +113,9 @@ namespace storm {
                }
                
                if (options.refine) {
-                    refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
+                    refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result);
                } else {
-                    computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
+                    computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result);
                }
                // "clear" results in case they were actually not requested (this will make the output a bit more clear)
                if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
@ -187,7 +188,7 @@ namespace storm {
            }
            
            template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) {
                
                if (options.discretize) {
                    std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit);
@ -195,7 +196,7 @@ namespace storm {
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
-                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    auto approx = std::make_shared<ExplorerType>(manager, pomdpValueBounds);
                    HeuristicParameters heuristicParameters;
                    heuristicParameters.gapThreshold = options.gapThresholdInit;
                    heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement
@ -215,7 +216,7 @@ namespace storm {
                    if (rewardModelName) {
                        manager->setRewardModel(rewardModelName);
                    }
-                    auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    auto approx = std::make_shared<ExplorerType>(manager, pomdpValueBounds);
                    HeuristicParameters heuristicParameters;
                    heuristicParameters.gapThreshold = options.gapThresholdInit;
                    heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
@ -239,7 +240,7 @@ namespace storm {
            }
            
            template<typename PomdpModelType, typename BeliefValueType>
-            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
+            void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) {
                statistics.refinementSteps = 0;

                // Set up exploration data
@ -253,7 +254,7 @@ namespace storm {
                    if (rewardModelName) {
                        overApproxBeliefManager->setRewardModel(rewardModelName);
                    }
-                    overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, pomdpValueBounds);
                    overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
                    overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
                    overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
@ -277,7 +278,7 @@ namespace storm {
                    if (rewardModelName) {
                        underApproxBeliefManager->setRewardModel(rewardModelName);
                    }
-                    underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
+                    underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, pomdpValueBounds);
                    underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
                    underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
                    underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@ -16,6 +16,9 @@ namespace storm {
    namespace pomdp {
        namespace modelchecker {
            
+            template<typename ValueType>
+            struct TrivialPomdpValueBounds;
+            
            template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
            class ApproximatePOMDPModelchecker {
            public:
@ -53,7 +56,7 @@ namespace storm {
                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
-                void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
+                void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result);
                
                
                /**
@ -63,7 +66,7 @@ namespace storm {
                 * @param min true if minimum probability is to be computed
                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                 */
-                void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
+                void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result);
                
                struct HeuristicParameters {
                    ValueType gapThreshold;
--- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
+++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
@ -15,26 +15,112 @@
 namespace storm {
    namespace pomdp {
        namespace modelchecker {
+    
+            template<typename ValueType>
+            struct TrivialPomdpValueBounds {
+                std::vector<std::vector<ValueType>> lower;
+                std::vector<std::vector<ValueType>> upper;
+                ValueType getHighestLowerBound(uint64_t const& state) {
+                    STORM_LOG_ASSERT(!lower.empty(), "requested a lower bound but none were available");
+                    auto it = lower.begin();
+                    ValueType result = (*it)[state];
+                    for (++it; it != lower.end(); ++it) {
+                        result = std::max(result, (*it)[state]);
+                    }
+                    return result;
+                }
+                ValueType getSmallestUpperBound(uint64_t const& state) {
+                    STORM_LOG_ASSERT(!upper.empty(), "requested an upper bound but none were available");
+                    auto it = upper.begin();
+                    ValueType result = (*it)[state];
+                    for (++it; it != upper.end(); ++it) {
+                        result = std::min(result, (*it)[state]);
+                    }
+                    return result;
+                }
+            };
+            
            template <typename PomdpType>
            class TrivialPomdpValueBoundsModelChecker {
            public:
                typedef typename PomdpType::ValueType ValueType;
+                typedef TrivialPomdpValueBounds<ValueType> ValueBounds;
                TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) {
                    // Intentionally left empty
                }
                
-                struct ValueBounds {
-                    std::vector<ValueType> lower;
-                    std::vector<ValueType> upper;
-                };
                ValueBounds getValueBounds(storm::logic::Formula const& formula) {
                    return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula));
                }
                
+                std::vector<ValueType> getChoiceValues(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards) {
+                    std::vector<ValueType> choiceValues((pomdp.getNumberOfChoices()));
+                    pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards);
+                    return choiceValues;
+                }
+                
+                std::vector<ValueType> computeValuesForGuessedScheduler(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, std::shared_ptr<storm::models::sparse::Mdp<ValueType>> underlyingMdp, ValueType const& scoreThreshold, bool relativeScore) {
+                    // Create some positional scheduler for the POMDP
+                    storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
+                    // For each state, we heuristically find a good distribution over output actions.
+                    auto choiceValues = getChoiceValues(stateValues, actionBasedRewards);
+                    auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
+                    std::vector<storm::storage::Distribution<ValueType, uint_fast64_t>> choiceDistributions(pomdp.getNrObservations());
+                    for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
+                        auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)];
+                        ValueType const& stateValue = stateValues[state];
+                        assert(stateValue >= storm::utility::zero<ValueType>());
+                        std::cout << state << ": " << stateValue << "\t t=" << scoreThreshold << " rel=" << relativeScore << std::endl;
+                        for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
+                            ValueType const& choiceValue = choiceValues[choice];
+                            assert(choiceValue >= storm::utility::zero<ValueType>());
+                            std::cout << "\t" << (choice - choiceIndices[state]) << ": " << choiceValue << std::endl;
+                            // Rate this choice by considering the relative difference between the choice value and the (optimal) state value
+                            // A high score shall mean that the choice is "good"
+                            if (storm::utility::isInfinity(stateValue)) {
+                                // For infinity states, we simply distribute uniformly.
+                                // FIXME: This case could be handled a bit more sensible
+                                choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold);
+                            } else {
+                                ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue);
+                                if (relativeScore) {
+                                    ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber<ValueType, uint64_t>(2);
+                                    if (!storm::utility::isZero(avg)) {
+                                            choiceScore /= avg;
+                                    }
+                                }
+                                choiceScore = storm::utility::one<ValueType>() - choiceScore;
+                                if (choiceScore >= scoreThreshold) {
+                                    choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore);
+                                }
+                            }
+
+                        }
+                        STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution.");
+                    }
+                    // Normalize all distributions
+                    for (auto& choiceDistribution : choiceDistributions) {
+                        choiceDistribution.normalize();
+                    }
+                    // Set the scheduler for all states
+                    for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
+                        pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state);
+                    }
+                    STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
+                    auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
+                    
+                    auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
+                    STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
+                    STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
+                    std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                    return pomdpSchedulerResult;
+                }
+                
                ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) {
                    STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis.");
+                    
                    // Compute the values on the fully observable MDP
-                    // We need an actual MDP here so that the apply scheduler method below will work.
+                    // We need an actual MDP so that we can apply schedulers below.
                    // Also, the api call in the next line will require a copy anyway.
                    auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
                    auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
@ -42,70 +128,76 @@ namespace storm {
                    STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
                    std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
                    
-                    // Create some positional scheduler for the POMDP
-                    storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
-                    // For each state, we heuristically find a good distribution over output actions.
-                    std::vector<ValueType> fullyObservableChoiceValues(pomdp.getNumberOfChoices());
+                    std::vector<ValueType> actionBasedRewards;
+                    std::vector<ValueType>* actionBasedRewardsPtr = nullptr;
                    if (info.isNonNestedExpectedRewardFormula()) {
-                        std::vector<ValueType> actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
-                        pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards);
-                    } else {
-                        pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues);
+                        actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
+                        actionBasedRewardsPtr = &actionBasedRewards;
                    }
-                    auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
-                    for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
-                        auto obsStates = pomdp.getStatesWithObservation(obs);
-                        storm::storage::Distribution<ValueType, uint_fast64_t> choiceDistribution;
-                        for (auto const &state : obsStates) {
-                            ValueType const& stateValue = fullyObservableResult[state];
-                            assert(stateValue >= storm::utility::zero<ValueType>());
-                            for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
-                                ValueType const& choiceValue = fullyObservableChoiceValues[choice];
-                                assert(choiceValue >= storm::utility::zero<ValueType>());
-                                // Rate this choice by considering the relative difference between the choice value and the (optimal) state value
-                                ValueType choiceRating;
-                                if (stateValue < choiceValue) {
-                                    choiceRating = choiceValue - stateValue;
-                                    if (!storm::utility::isZero(choiceValue)) {
-                                        choiceRating /= choiceValue;
-                                    }
+                    std::vector<std::vector<ValueType>> guessedSchedulerValues;
+                    
+                    std::vector<std::pair<double, bool>> guessParameters({{0.875,false},{0.875,true},{0.75,false},{0.75,true}});
+                    for (auto const& pars : guessParameters) {
+                        guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(pars.first), pars.second));
+                    }
+                    
+                    // compute the 'best' guess and do a few iterations on it
+                    uint64_t bestGuess = 0;
+                    ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero<ValueType>());
+                    for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) {
+                        ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero<ValueType>());
+                        if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) {
+                            bestGuess = guess;
+                            bestGuessSum = guessSum;
+                        }
+                    }
+                    guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
+                    guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
+                    guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
+
+                    // Check if one of the guesses is worse than one of the others (and potentially delete it)
+                    // Avoid deleting entries during the loop to ensure that indices remain valid
+                    storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true);
+                    for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) {
+                        if (!keptGuesses.get(i)) {
+                            continue;
+                        }
+                        for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) {
+                            if (!keptGuesses.get(j)) {
+                                continue;
+                            }
+                            if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal<ValueType>())) {
+                                if (info.minimize()) {
+                                    // In this case we are guessing upper bounds (and smaller upper bounds are better)
+                                    keptGuesses.set(j, false);
                                } else {
-                                    choiceRating = stateValue - choiceValue;
-                                    if (!storm::utility::isZero(stateValue)) {
-                                        choiceRating /= stateValue;
-                                    }
+                                    // In this case we are guessing lower bounds (and larger lower bounds are better)
+                                    keptGuesses.set(i, false);
+                                    break;
                                }
-                                assert(choiceRating <= storm::utility::one<ValueType>());
-                                assert(choiceRating >= storm::utility::zero<ValueType>());
-                                // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice
-                                if (choiceRating <= storm::utility::convertNumber<ValueType>(0.5)) {
-                                    choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one<ValueType>() - choiceRating);
+                            } else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal<ValueType>())) {
+                                if (info.minimize()) {
+                                    keptGuesses.set(i, false);
+                                    break;
+                                } else {
+                                    keptGuesses.set(j, false);
                                }
                            }
                        }
-                        choiceDistribution.normalize();
-                        for (auto const& state : obsStates) {
-                            pomdpScheduler.setChoice(choiceDistribution, state);
-                        }
                    }
-                    STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
-                    auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
-                    
-                    auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
-                    STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained.");
-                    STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
-                    std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
+                    std::cout << "Keeping scheduler guesses " << keptGuesses << std::endl;
+                    storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses);
                    
                    // Finally prepare the result
                    ValueBounds result;
                    if (info.minimize()) {
-                        result.lower = std::move(fullyObservableResult);
-                        result.upper = std::move(pomdpSchedulerResult);
+                        result.lower.push_back(std::move(fullyObservableResult));
+                        result.upper = std::move(guessedSchedulerValues);
                    } else {
-                        result.lower = std::move(pomdpSchedulerResult);
-                        result.upper = std::move(fullyObservableResult);
+                        result.lower = std::move(guessedSchedulerValues);
+                        result.upper.push_back(std::move(fullyObservableResult));
                    }
-                    STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal<ValueType>()), "Lower bound is larger than upper bound");
+                    STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal<ValueType>()), "Lower bound is larger than upper bound");
                    return result;
                }