From f119e3d4c751f99276b301280675c7a6ecaa38a7 Mon Sep 17 00:00:00 2001
From: Alexander Bork <alexander.bork@rwth-aachen.de>
Date: Fri, 18 Oct 2019 11:32:36 +0200
Subject: [PATCH] Added reward over-approximation

---
 src/storm-pomdp-cli/storm-pomdp.cpp           |  55 ++++
 .../ApproximatePOMDPModelchecker.cpp          | 254 +++++++++++++++++-
 2 files changed, 308 insertions(+), 1 deletion(-)
diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp
index e6be182a3..b34ed65b7 100644
--- a/src/storm-pomdp-cli/storm-pomdp.cpp
+++ b/src/storm-pomdp-cli/storm-pomdp.cpp
@@ -183,6 +183,7 @@ int main(const int argc, const char** argv) {
                     }
                     STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException,
                                     "The formula is not supported by the grid approximation");
+                    STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!");
 
                     storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double>();
                     double overRes = storm::utility::one<double>();
@@ -203,6 +204,60 @@ int main(const int argc, const char** argv) {
                     pomdp = selfLoopEliminator.transform();
                     STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl);
                 }
+                if (pomdpSettings.isGridApproximationSet()) {
+                    std::string rewardModelName;
+                    storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula();
+                    if (rewFormula.hasRewardModelName()) {
+                        rewardModelName = rewFormula.getRewardModelName();
+                    }
+                    storm::logic::Formula const &subformula1 = rewFormula.getSubformula();
+
+                    std::set<uint32_t> targetObservationSet;
+                    //TODO refactor
+                    bool validFormula = false;
+                    if (subformula1.isEventuallyFormula()) {
+                        storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula();
+                        storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula();
+                        if (subformula2.isAtomicLabelFormula()) {
+                            storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula();
+                            validFormula = true;
+                            std::string targetLabel = alFormula.getLabel();
+                            auto labeling = pomdp->getStateLabeling();
+                            for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) {
+                                if (labeling.getStateHasLabel(targetLabel, state)) {
+                                    targetObservationSet.insert(pomdp->getObservation(state));
+                                }
+                            }
+                        } else if (subformula2.isAtomicExpressionFormula()) {
+                            validFormula = true;
+                            std::stringstream stream;
+                            stream << subformula2.asAtomicExpressionFormula().getExpression();
+                            storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str());
+                            std::string targetLabel = formula3.getLabel();
+                            auto labeling = pomdp->getStateLabeling();
+                            for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) {
+                                if (labeling.getStateHasLabel(targetLabel, state)) {
+                                    targetObservationSet.insert(pomdp->getObservation(state));
+                                }
+                            }
+                        }
+                    }
+                    STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException,
+                                    "The formula is not supported by the grid approximation");
+                    STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!");
+
+                    storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double>();
+                    double overRes = storm::utility::one<double>();
+                    double underRes = storm::utility::zero<double>();
+                    std::unique_ptr<storm::pomdp::modelchecker::POMDPCheckResult<double>> result;
+                    result = checker.computeReachabilityReward(*pomdp, targetObservationSet,
+                                                               rewFormula.getOptimalityType() ==
+                                                               storm::OptimizationDirection::Minimize,
+                                                               pomdpSettings.getGridResolution());
+                    overRes = result->OverapproximationValue;
+                    underRes = result->UnderapproximationValue;
+                }
+
             }
             if (pomdpSettings.getMemoryBound() > 1) {
                 STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ...");
diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
index e0b12229a..43d2a0225 100644
--- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
+++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
@@ -179,7 +179,6 @@ namespace storm {
                         }
                     }
                     finished = !improvement;
-                    storm::utility::Stopwatch backupTimer(true);
                     // back up
                     result_backup = result;
 
@@ -231,6 +230,249 @@ namespace storm {
                         POMDPCheckResult<ValueType>{overApprox, underApprox});
             }
 
+            //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication!
+            template<typename ValueType, typename RewardModelType>
+            std::unique_ptr<POMDPCheckResult<ValueType>>
+            ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
+                                                                                                std::set<uint32_t> targetObservations, bool min, uint64_t gridResolution) {
+                storm::utility::Stopwatch beliefGridTimer(true);
+                uint64_t maxIterations = 100;
+                bool finished = false;
+                uint64_t iteration = 0;
+
+                RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel();
+
+                std::vector<storm::pomdp::Belief<ValueType>> beliefList;
+                std::vector<bool> beliefIsTarget;
+                uint64_t nextId = 0;
+                // Initial belief always has ID 0
+                storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId);
+                ++nextId;
+                beliefList.push_back(initialBelief);
+                beliefIsTarget.push_back(
+                        targetObservations.find(initialBelief.observation) != targetObservations.end());
+
+
+                std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
+                constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget,
+                                    nextId);
+                nextId = beliefList.size();
+                beliefGridTimer.stop();
+
+                storm::utility::Stopwatch overApproxTimer(true);
+                // Belief ID -> Value
+                std::map<uint64_t, ValueType> result;
+                std::map<uint64_t, ValueType> result_backup;
+                // Belief ID -> ActionIndex
+                std::map<uint64_t, uint64_t> chosenActions;
+
+                // Belief ID -> Observation -> Probability
+                std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
+                // current ID -> action -> next ID
+                std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> nextBelieves;
+                // current ID -> action -> reward
+                std::map<uint64_t, std::vector<ValueType>> beliefActionRewards;
+
+                storm::utility::Stopwatch nextBeliefGeneration(true);
+                for (size_t i = 0; i < beliefGrid.size(); ++i) {
+                    auto currentBelief = beliefGrid[i];
+                    bool isTarget = beliefIsTarget[currentBelief.id];
+                    result.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
+                    result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
+                    if (!isTarget) {
+                        //TODO put this in extra function
+                        storm::utility::Stopwatch beliefWatch(true);
+                        // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative
+                        uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
+                        uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
+                        std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices);
+                        std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
+
+                        std::vector<ValueType> actionRewardsInState(numChoices);
+
+                        for (uint64_t action = 0; action < numChoices; ++action) {
+                            storm::utility::Stopwatch aopWatch(true);
+                            std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
+                                    pomdp, currentBelief, action);
+                            aopWatch.stop();
+                            //STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl)
+                            std::map<uint32_t, uint64_t> actionObservationBelieves;
+                            for (auto iter = actionObservationProbabilities.begin();
+                                 iter != actionObservationProbabilities.end(); ++iter) {
+                                uint32_t observation = iter->first;
+                                storm::utility::Stopwatch callWatch(true);
+                                // THIS CALL IS SLOW
+                                // TODO speed this up
+                                actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp,
+                                                                                                            beliefList,
+                                                                                                            beliefIsTarget,
+                                                                                                            targetObservations,
+                                                                                                            currentBelief,
+                                                                                                            action,
+                                                                                                            observation,
+                                                                                                            nextId);
+                                nextId = beliefList.size();
+                                callWatch.stop();
+                                //STORM_PRINT("Overall: " << callWatch << std::endl)
+                            }
+                            observationProbabilitiesInAction[action] = actionObservationProbabilities;
+                            nextBelievesInAction[action] = actionObservationBelieves;
+
+                            actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
+                                                                                currentBelief);
+                        }
+                        observationProbabilities.emplace(
+                                std::make_pair(currentBelief.id, observationProbabilitiesInAction));
+                        nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction));
+                        beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState));
+                        beliefWatch.stop();
+                        //STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl)
+                    }
+
+                }
+                nextBeliefGeneration.stop();
+
+                //Use chaching to avoid multiple computation of the subsimplices and lambdas
+                std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
+                std::map<uint64_t, std::vector<ValueType>> lambdaCache;
+
+                STORM_PRINT("Nr Grid Believes " << beliefGrid.size() << std::endl)
+                STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl)
+                // Value Iteration
+                while (!finished && iteration < maxIterations) {
+                    storm::utility::Stopwatch iterationTimer(true);
+                    STORM_LOG_DEBUG("Iteration " << iteration + 1);
+                    bool improvement = false;
+                    for (size_t i = 0; i < beliefGrid.size(); ++i) {
+                        storm::pomdp::Belief<ValueType> currentBelief = beliefGrid[i];
+                        bool isTarget = beliefIsTarget[currentBelief.id];
+                        if (!isTarget) {
+                            // we can take any state with the observation as they have the same number of choices
+                            uint64_t numChoices = pomdp.getNumberOfChoices(
+                                    pomdp.getStatesWithObservation(currentBelief.observation).front());
+                            // Initialize the values for the value iteration
+                            ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
+                                                        : -storm::utility::infinity<ValueType>();
+                            uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
+                            ValueType currentValue;
+
+                            for (uint64_t action = 0; action < numChoices; ++action) {
+                                storm::utility::Stopwatch actionWatch(true);
+
+                                currentValue = beliefActionRewards[currentBelief.id][action];
+                                storm::utility::Stopwatch loopTimer(true);
+                                for (auto iter = observationProbabilities[currentBelief.id][action].begin();
+                                     iter != observationProbabilities[currentBelief.id][action].end(); ++iter) {
+                                    storm::utility::Stopwatch subsimplexTime(true);
+                                    uint32_t observation = iter->first;
+                                    storm::pomdp::Belief<ValueType> nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]];
+                                    // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
+                                    // cache the values  to not always re-calculate
+                                    std::vector<std::vector<ValueType>> subSimplex;
+                                    std::vector<ValueType> lambdas;
+                                    if (subSimplexCache.count(nextBelief.id) > 0) {
+                                        subSimplex = subSimplexCache[nextBelief.id];
+                                        lambdas = lambdaCache[nextBelief.id];
+                                    } else {
+                                        std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
+                                                nextBelief.probabilities, gridResolution);
+                                        subSimplex = temp.first;
+                                        lambdas = temp.second;
+                                        subSimplexCache[nextBelief.id] = subSimplex;
+                                        lambdaCache[nextBelief.id] = lambdas;
+                                    }
+                                    subsimplexTime.stop();
+                                    //STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl)
+                                    storm::utility::Stopwatch sumTime(true);
+                                    auto sum = storm::utility::zero<ValueType>();
+                                    for (size_t j = 0; j < lambdas.size(); ++j) {
+                                        if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
+                                            sum += lambdas[j] * result_backup.at(
+                                                    getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
+                                        }
+                                    }
+
+                                    currentValue += iter->second * sum;
+                                    sumTime.stop();
+                                    //STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl)
+                                }
+                                loopTimer.stop();
+                                //STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl)
+                                // Update the selected actions
+                                if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
+                                    (!min &&
+                                     cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
+                                    cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
+
+                                    chosenValue = currentValue;
+                                    chosenActionIndex = action;
+                                }
+                                actionWatch.stop();
+                                //STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl)
+                            }
+
+                            result[currentBelief.id] = chosenValue;
+
+                            chosenActions[currentBelief.id] = chosenActionIndex;
+                            // Check if the iteration brought an improvement
+                            if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) ||
+                                cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
+                                improvement = true;
+                            }
+                        }
+                    }
+                    finished = !improvement;
+                    // back up
+                    result_backup = result;
+
+                    ++iteration;
+                    iterationTimer.stop();
+                    STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl);
+                }
+
+                STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl);
+
+                beliefGrid.push_back(initialBelief);
+                beliefIsTarget.push_back(
+                        targetObservations.find(initialBelief.observation) != targetObservations.end());
+
+                std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
+                        initialBelief.probabilities, gridResolution);
+                std::vector<std::vector<ValueType>> initSubSimplex = temp.first;
+                std::vector<ValueType> initLambdas = temp.second;
+
+                auto overApprox = storm::utility::zero<ValueType>();
+                for (size_t j = 0; j < initLambdas.size(); ++j) {
+                    if (initLambdas[j] != storm::utility::zero<ValueType>()) {
+                        overApprox += initLambdas[j] *
+                                      result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation,
+                                                                        initSubSimplex[j])];
+                    }
+                }
+                overApproxTimer.stop();
+                /*
+                // Now onto the under-approximation
+                bool useMdp = false;true;
+                storm::utility::Stopwatch underApproxTimer(true);
+                ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
+                                                                                  result, chosenActions, gridResolution, initialBelief.id, min) :
+                                        computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result,
+                                                                          chosenActions, gridResolution, initialBelief.id, min);
+                underApproxTimer.stop();
+
+                STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl
+                                                            << "Time Overapproximation: " << overApproxTimer
+                                                            << std::endl
+                                                            << "Time Underapproximation: " << underApproxTimer
+                                                            << std::endl);
+*/
+                STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
+                //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
+
+                return std::make_unique<POMDPCheckResult<ValueType>>(
+                        POMDPCheckResult<ValueType>{overApprox, storm::utility::zero<ValueType>()});
+            }
+
             template<typename ValueType, typename RewardModelType>
             ValueType
             ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@@ -831,6 +1073,16 @@ namespace storm {
                 }
             }
 
+            template<typename ValueType, typename RewardModelType>
+            ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
+                                                                                                     uint64_t action, storm::pomdp::Belief<ValueType> belief) {
+                auto result = storm::utility::zero<ValueType>();
+                for (size_t i = 0; i < belief.probabilities.size(); ++i) {
+                    result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix());
+                }
+                return result;
+            }
+
 
             template
             class ApproximatePOMDPModelchecker<double>;