From 45832d3de37e5283003a3bfd5114b59c60697140 Mon Sep 17 00:00:00 2001
From: Tim Quatmann <tim.quatmann@cs.rwth-aachen.de>
Date: Wed, 22 Apr 2020 12:12:05 +0200
Subject: [PATCH] BeliefMdpExplorer: Implemented extraction of optimal
 scheduler choices and reachable states under these choices

---
 src/storm-pomdp/builder/BeliefMdpExplorer.h | 77 ++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h
index 36f525663..2e1b0f4cc 100644
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@@ -61,6 +61,8 @@ namespace storm {
                 exploredMdpTransitions.clear();
                 exploredChoiceIndices.clear();
                 mdpActionRewards.clear();
+                optimalMdpChoices = boost::none;
+                optimalChoicesReachableMdpStates = boost::none;
                 exploredMdp = nullptr;
                 internalAddRowGroupIndex(); // Mark the start of the first row group
 
@@ -230,6 +232,31 @@ namespace storm {
                 return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
             }
             
+            /*!
+             * Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check.
+             * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior.
+             */
+            bool currentStateIsOptimalSchedulerReachable() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state.");
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior");
+                STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
+                return optimalChoicesReachableMdpStates->get(getCurrentMdpState());
+            }
+            
+            /*!
+             * Retrieves whether the given action at the current state was optimal in the most recent check.
+             * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior.
+             */
+            bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
+                STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
+                uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
+                return optimalChoices->get(row);
+            }
+            
             /*!
              * Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
              * Does NOT set whether the state is truncated and/or target.
@@ -285,6 +312,10 @@ namespace storm {
                     dropUnexploredStates();
                 }
                 
+                // The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore.
+                optimalChoices = boost::none;
+                optimalChoicesReachableMdpStates = boost::none;
+                
                 // Create the tranistion matrix
                 uint64_t entryCount = 0;
                 for (auto const& row : exploredMdpTransitions) {
@@ -558,6 +589,48 @@ namespace storm {
                 lowerValueBounds = values;
             }
             
+            /*!
+             *
+             * Computes the set of states that are reachable via a path that is consistent with an optimal MDP scheduler.
+             * States that are only reachable via target states will not be in this set.
+             * @param ancillaryChoicesEpsilon if the difference of a 1-step value of a choice is only epsilon away from the optimal value, the choice will be included.
+             * @param relative if set, we consider the relative difference to detect ancillaryChoices
+             */
+            void computeOptimalChoicesAndReachableMdpStates(ValueType const& ancillaryChoicesEpsilon, bool relativeDifference) {
+                STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available.");
+                STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before.");
+                STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), "Tried to compute states that are reachable under an optimal scheduler but this has already been done before.");
+                
+                // First find the choices that are optimal
+                optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false);
+                auto const& choiceIndices = exploredMdp->getNondeterministicChoiceIndices();
+                auto const& transitions = exploredMdp->getTransitionMatrix();
+                auto const& targetStates = exploredMdp->getStates("target");
+                for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) {
+                    if (targetStates.get(mdpState)) {
+                        // Target states can be skipped.
+                        continue;
+                    } else {
+                        auto const& stateValue = values[mdpState];
+                        for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) {
+                            ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values);
+                            if (exploredMdp->hasRewardModel()) {
+                                choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice);
+                            }
+                            ValueType absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue));
+                            if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) {
+                                optimalChoices->set(globalChoice, true);
+                            }
+                        }
+                        STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice.");
+                    }
+                }
+                
+                // Then, find the states that are reachable via these choices
+                optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, optimalChoices.get());
+            }
+            
         private:
             MdpStateType noState() const {
                 return std::numeric_limits<MdpStateType>::max();
@@ -672,12 +745,14 @@ namespace storm {
             // Final Mdp
             std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
             
-            // Value related information
+            // Value and scheduler related information
             std::vector<ValueType> const& pomdpLowerValueBounds;
             std::vector<ValueType> const& pomdpUpperValueBounds;
             std::vector<ValueType> lowerValueBounds;
             std::vector<ValueType> upperValueBounds;
             std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
+            boost::optional<storm::storage::BitVector> optimalMdpChoices;
+            boost::optional<storm::storage::BitVector> optimalChoicesReachableMdpStates;
             
             // The current status of this explorer
             Status status;