BeliefMdpExplorer: Implemented extraction of optimal scheduler choices and reachable states under these choices

6 years ago · 45832d3de3
1 changed files with 76 additions and 1 deletions
--- a/src/storm-pomdp/builder/BeliefMdpExplorer.h
+++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h
@ -61,6 +61,8 @@ namespace storm {
                exploredMdpTransitions.clear();
                exploredChoiceIndices.clear();
                mdpActionRewards.clear();
+                optimalMdpChoices = boost::none;
+                optimalChoicesReachableMdpStates = boost::none;
                exploredMdp = nullptr;
                internalAddRowGroupIndex(); // Mark the start of the first row group

@ -230,6 +232,31 @@ namespace storm {
                return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
            }
            
+            /*!
+             * Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check.
+             * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior.
+             */
+            bool currentStateIsOptimalSchedulerReachable() {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state.");
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior");
+                STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
+                return optimalChoicesReachableMdpStates->get(getCurrentMdpState());
+            }
+            
+            /*!
+             * Retrieves whether the given action at the current state was optimal in the most recent check.
+             * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior.
+             */
+            bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) {
+                STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
+                STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
+                STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
+                uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
+                return optimalChoices->get(row);
+            }
+            
            /*!
             * Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
             * Does NOT set whether the state is truncated and/or target.
@ -285,6 +312,10 @@ namespace storm {
                    dropUnexploredStates();
                }
                
+                // The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore.
+                optimalChoices = boost::none;
+                optimalChoicesReachableMdpStates = boost::none;
+                
                // Create the tranistion matrix
                uint64_t entryCount = 0;
                for (auto const& row : exploredMdpTransitions) {
@ -558,6 +589,48 @@ namespace storm {
                lowerValueBounds = values;
            }
            
+            /*!
+             *
+             * Computes the set of states that are reachable via a path that is consistent with an optimal MDP scheduler.
+             * States that are only reachable via target states will not be in this set.
+             * @param ancillaryChoicesEpsilon if the difference of a 1-step value of a choice is only epsilon away from the optimal value, the choice will be included.
+             * @param relative if set, we consider the relative difference to detect ancillaryChoices
+             */
+            void computeOptimalChoicesAndReachableMdpStates(ValueType const& ancillaryChoicesEpsilon, bool relativeDifference) {
+                STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
+                STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available.");
+                STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before.");
+                STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), "Tried to compute states that are reachable under an optimal scheduler but this has already been done before.");
+                
+                // First find the choices that are optimal
+                optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false);
+                auto const& choiceIndices = exploredMdp->getNondeterministicChoiceIndices();
+                auto const& transitions = exploredMdp->getTransitionMatrix();
+                auto const& targetStates = exploredMdp->getStates("target");
+                for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) {
+                    if (targetStates.get(mdpState)) {
+                        // Target states can be skipped.
+                        continue;
+                    } else {
+                        auto const& stateValue = values[mdpState];
+                        for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) {
+                            ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values);
+                            if (exploredMdp->hasRewardModel()) {
+                                choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice);
+                            }
+                            ValueType absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue));
+                            if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) {
+                                optimalChoices->set(globalChoice, true);
+                            }
+                        }
+                        STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice.");
+                    }
+                }
+                
+                // Then, find the states that are reachable via these choices
+                optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, optimalChoices.get());
+            }
+            
        private:
            MdpStateType noState() const {
                return std::numeric_limits<MdpStateType>::max();
@ -672,12 +745,14 @@ namespace storm {
            // Final Mdp
            std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
            
-            // Value related information
+            // Value and scheduler related information
            std::vector<ValueType> const& pomdpLowerValueBounds;
            std::vector<ValueType> const& pomdpUpperValueBounds;
            std::vector<ValueType> lowerValueBounds;
            std::vector<ValueType> upperValueBounds;
            std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
+            boost::optional<storm::storage::BitVector> optimalMdpChoices;
+            boost::optional<storm::storage::BitVector> optimalChoicesReachableMdpStates;
            
            // The current status of this explorer
            Status status;