From e1bd87b91a561e3ddebbe880aeeabdce4d1e8326 Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Fri, 28 Feb 2020 12:45:34 +0100
Subject: [PATCH] Added documentation

---
 .../ApproximatePOMDPModelchecker.h            | 213 +++++++++++++-----
 1 file changed, 159 insertions(+), 54 deletions(-)
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
index eb3247de1..66e5d563d 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
@@ -47,24 +47,76 @@ namespace storm {
             public:
                 explicit ApproximatePOMDPModelchecker();
 
+                /**
+                 * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
+                 *
+                 * @param pomdp the POMDP to be checked
+                 * @param targetObservations the set of observations to be reached
+                 * @param min true if minimum probability is to be computed
+                 * @param gridResolution the initial grid resolution
+                 * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
+                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 refineReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, std::set<uint32_t> const &targetObservations, bool min,
                                               uint64_t gridResolution, double explorationThreshold);
 
+                /**
+                 * Compute the reachability probability of given target observations on a POMDP for the given resolution only.
+                 * On-the-fly state space generation is used for the overapproximation
+                 *
+                 * @param pomdp the POMDP to be checked
+                 * @param targetObservations the set of observations to be reached
+                 * @param min true if minimum probability is to be computed
+                 * @param gridResolution the grid resolution
+                 * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                   std::set<uint32_t> const &targetObservations, bool min,
                                                   uint64_t gridResolution, double explorationThreshold);
 
+                /**
+                 * Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
+                 * On-the-fly state space generation is used for the overapproximation
+                 *
+                 * @param pomdp the POMDP to be checked
+                 * @param targetObservations the set of observations to be reached
+                 * @param min true if minimum rewards are to be computed
+                 * @param gridResolution the initial grid resolution
+                 * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityRewardOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, std::set<uint32_t> const &targetObservations, bool min,
                                              uint64_t gridResolution);
 
+                /**
+                 * Compute the reachability probability for given target observations on a POMDP for the given resolution only.
+                 * Static state space generation is used for the overapproximation, i.e. the whole grid is generated
+                 *
+                 * @param pomdp the POMDP to be checked
+                 * @param targetObservations the set of observations to be reached
+                 * @param min true if the minimum probability is to be computed
+                 * @param gridResolution the initial grid resolution
+                 * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                std::set<uint32_t> const &targetObservations, bool min,
                                                uint64_t gridResolution);
 
+                /**
+                 * Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
+                 * Static state space generation is used for the overapproximation, i.e. the whole grid is generated
+                 *
+                 * @param pomdp the POMDP to be checked
+                 * @param targetObservations the set of observations to be reached
+                 * @param min true if the minimum rewards are to be computed
+                 * @param gridResolution the initial grid resolution
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
+                 */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                           std::set<uint32_t> const &targetObservations, bool min,
@@ -72,16 +124,18 @@ namespace storm {
 
             private:
                 /**
+                 * Helper method to compute the inital step of the refinement loop
                  *
-                 * @param pomdp
-                 * @param targetObservations
-                 * @param min
-                 * @param observationResolutionVector
-                 * @param computeRewards
-                 * @param explorationThreshold
-                 * @param overApproximationMap
-                 * @param underApproximationMap
-                 * @return
+                 * @param pomdp the pomdp to be checked
+                 * @param targetObservations set of target observations
+                 * @param min true if minimum value is to be computed
+                 * @param observationResolutionVector vector containing the resolution to be used for each observation
+                 * @param computeRewards true if rewards are to be computed, false if probability is computed
+                 * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
+                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
+                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
+                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
+                 * @return struct containing components generated during the computation to be used in later refinement iterations
                  */
                 std::unique_ptr<RefinementComponents<ValueType>>
                 computeFirstRefinementStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -90,13 +144,18 @@ namespace storm {
                                            boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
 
                 /**
+                 * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
                  *
-                 * @param pomdp
-                 * @param targetObservations
-                 * @param min
-                 * @param gridResolution
-                 * @param computeRewards
-                 * @return
+                 * @param pomdp the pomdp to be checked
+                 * @param targetObservations set of target observations
+                 * @param min true if minimum value is to be computed
+                 * @param observationResolutionVector vector containing the resolution to be used for each observation
+                 * @param computeRewards true if rewards are to be computed, false if probability is computed
+                 * @param explorationThreshold the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
+                 * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
+                 * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
+                 * @param maxUaModelSize the maximum size of the underapproximation model to be generated
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                  */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachabilityOTF(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -106,13 +165,14 @@ namespace storm {
                                        boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
 
                 /**
+                 * Helper method to compute reachability properties using static state space generation
                  *
-                 * @param pomdp
-                 * @param targetObservations
-                 * @param min
-                 * @param gridResolution
-                 * @param computeRewards
-                 * @return
+                 * @param pomdp the POMDP to be checked
+                 * @param targetObservations set of target observations
+                 * @param min true if minimum value is to be computed
+                 * @param gridResolution the resolution of the grid to be used
+                 * @param computeRewards true if rewards are to be computed, false if probability is computed
+                 * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
                  */
                 std::unique_ptr<POMDPCheckResult<ValueType>>
                 computeReachability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -166,16 +226,18 @@ namespace storm {
                                                         bool min);
 
                 /**
-                 * TODO
-                 * @param pomdp
-                 * @param beliefList
-                 * @param beliefIsTarget
-                 * @param targetObservations
-                 * @param initialBeliefId
-                 * @param min
-                 * @param computeReward
-                 * @param maxModelSize
-                 * @return
+                 * Helper to compute an underapproximation of the reachability property.
+                 * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
+                 *
+                 * @param pomdp the POMDP to be checked
+                 * @param beliefList vector containing already generated beliefs
+                 * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target
+                 * @param targetObservations set of target observations
+                 * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state
+                 * @param min true if minimum value is to be computed
+                 * @param computeReward true if rewards are to be computed
+                 * @param maxModelSize number of states up until which the belief support should be unrolled
+                 * @return struct containing the components generated during the under approximation
                  */
                 std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                                                                                              std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
@@ -185,31 +247,38 @@ namespace storm {
                                                                                                              uint64_t maxModelSize);
 
                 /**
+                 * Constructs the initial belief for the given POMDP
                  *
-                 * @param pomdp
-                 * @param id
-                 * @return
+                 * @param pomdp the POMDP
+                 * @param id the id the initial belief is given
+                 * @return a belief representing the initial belief
                  */
                 storm::pomdp::Belief<ValueType>
                 getInitialBelief(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id);
 
 
                 /**
+                 * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation
                  *
-                 * @param probabilities
-                 * @param gridResolution
-                 * @return
+                 * @param probabilities the probability distribution of the belief
+                 * @param gridResolution the resolution used for the belief
+                 * @param nrStates number of states in the POMDP
+                 * @return a pair containing: 1) the subsimplices 2) the lambda values
                  */
                 std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
                 computeSubSimplexAndLambdas(std::map<uint64_t, ValueType> &probabilities, uint64_t gridResolution, uint64_t nrStates);
 
 
                 /**
-                 *  Helper method to construct the grid of Belief states to approximate the POMDP
-                 *
-                 * @param pomdp
-                 * @param gridResolution
+                 * Helper method to construct the static belief grid for the POMDP overapproximation
                  *
+                 * @param pomdp the POMDP to be approximated
+                 * @param target_observations set of target observations
+                 * @param gridResolution the resolution of the grid to be constructed
+                 * @param beliefList data structure to store all generated beliefs
+                 * @param grid data structure to store references to the grid beliefs specifically
+                 * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief
+                 * @param nextId the ID to be used for the next generated belief
                  */
                 void constructBeliefGrid(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
                                          std::set<uint32_t> const &target_observations, uint64_t gridResolution,
@@ -219,12 +288,12 @@ namespace storm {
 
 
                 /**
-                 * Helper method to get the probabilities of each observation after performing an action
+                 * Helper method to get the probabilities to be in a state with each observation after performing an action
                  *
-                 * @param pomdp
-                 * @param belief
-                 * @param actionIndex
-                 * @return
+                 * @param pomdp the POMDP
+                 * @param belief the belief in which the action is performed
+                 * @param actionIndex the index of the action to be performed
+                 * @return mapping from each observation to the probability to be in a state with that observation after performing the action
                  */
                 std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction(
                         storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -236,6 +305,9 @@ namespace storm {
                  * If the belief does not exist yet, it is created and added to the list of all beliefs
                  *
                  * @param pomdp the POMDP on which the evaluation should be performed
+                 * @param beliefList data structure to store all generated beliefs
+                 * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief
+                 * @param targetObservations set of target observations
                  * @param belief the starting belief
                  * @param actionIndex the index of the action to be performed
                  * @param observation the observation after the action was performed
@@ -250,12 +322,13 @@ namespace storm {
                         uint64_t actionIndex, uint32_t observation, uint64_t id);
 
                 /**
-                 * Helper method to get the next belief that results from a belief by performing an action
+                 * Helper method to generate the next belief that results from a belief by performing an action
                  *
-                 * @param pomdp
-                 * @param belief
-                 * @param actionIndex
-                 * @return
+                 * @param pomdp the POMDP
+                 * @param belief the starting belief
+                 * @param actionIndex the index of the action to be performed
+                 * @param id the ID for the generated belief
+                 * @return a belief object representing the belief after performing the action in the starting belief
                  */
                 storm::pomdp::Belief<ValueType>
                 getBeliefAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, storm::pomdp::Belief<ValueType> &belief, uint64_t actionIndex,
@@ -264,21 +337,53 @@ namespace storm {
                 /**
                  * Helper to get the id of a Belief stored in a given vector structure
                  *
-                 * @param observation
-                 * @param probabilities
-                 * @return
+                 * @param grid the vector on which the lookup is performed
+                 * @param observation the observation of the belief
+                 * @param probabilities the probability distribution over the POMDP states of the Belief
+                 * @return if the belief was found in the vector, the belief's ID, otherwise -1
                  */
                 uint64_t getBeliefIdInVector(std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
                                              std::map<uint64_t, ValueType> &probabilities);
 
                 /**
+                 * Helper method to build the transition matrix from a data structure containing transations
+                 *
                  * @param transitions  data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability)
                  * @return sparseMatrix representing the transitions
                  */
                 storm::storage::SparseMatrix<ValueType> buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions);
 
+                /**
+                 * Get the reward for performing an action in a given belief
+                 *
+                 * @param pomdp the POMDP
+                 * @param action the index of the action to be performed
+                 * @param belief the belief in which the action is performed
+                 * @return the reward earned by performing the action in the belief
+                 */
                 ValueType getRewardAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t action, storm::pomdp::Belief<ValueType> &belief);
 
+
+                /**
+                 * Helper method for value iteration on data structures representing the belief grid
+                 * This is very close to the method implemented in PRISM POMDP
+                 *
+                 * @param pomdp The POMDP
+                 * @param beliefList data structure to store all generated beliefs
+                 * @param beliefGrid data structure to store references to the grid beliefs specifically
+                 * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief
+                 * @param observationProbabilities data structure containing for each belief and possible action the probability to go to a state with a given observation
+                 * @param nextBelieves data structure containing for each belief the successor belief after performing an action and observing a given observation
+                 * @param beliefActionRewards data structure containing for each belief and possible action the reward for performing the action
+                 * @param subSimplexCache caching data structure to store already computed subsimplices
+                 * @param lambdaCache caching data structure to store already computed lambda values
+                 * @param result data structure to store result values for each grid state
+                 * @param chosenActions data structure to store the action(s) that lead to the computed result value
+                 * @param gridResolution the resolution of the grid
+                 * @param min true if minimal values are to be computed
+                 * @param computeRewards true if rewards are to be computed
+                 * @return the resulting probability/reward in the initial state
+                 */
                 ValueType
                 overApproximationValueIteration(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
                                                 std::vector<storm::pomdp::Belief<ValueType>> &beliefGrid, std::vector<bool> &beliefIsTarget,