From 4c8395c3b1e4db3de56974b96f425443f05a2ddd Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 18 Oct 2019 11:25:03 +0200 Subject: [PATCH] Speedup of probability approximation --- .../ApproximatePOMDPModelchecker.cpp | 67 ++++++++++--------- .../ApproximatePOMDPModelchecker.h | 9 ++- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 69e9bea80..e0b12229a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -18,14 +18,13 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - //Intentionally left empty + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); } template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability( - storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { storm::utility::Stopwatch beliefGridTimer(true); uint64_t maxIterations = 100; bool finished = false; @@ -77,6 +76,9 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); @@ -94,8 +96,8 @@ namespace storm { nextId); nextId = beliefList.size(); } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; } observationProbabilities.emplace( std::make_pair(currentBelief.id, observationProbabilitiesInAction)); @@ -111,7 +113,6 @@ namespace storm { STORM_PRINT("Nr Believes " << beliefList.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); STORM_LOG_DEBUG("Iteration " << iteration + 1); @@ -171,10 +172,8 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndex; // Check if the iteration brought an improvement - if ((min && cc.isLess(storm::utility::zero(), - result_backup[currentBelief.id] - result[currentBelief.id])) || - (!min && cc.isLess(storm::utility::zero(), - result[currentBelief.id] - result_backup[currentBelief.id]))) { + if ((min && cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) || + (!min && cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id]))) { improvement = true; } } @@ -182,11 +181,8 @@ namespace storm { finished = !improvement; storm::utility::Stopwatch backupTimer(true); // back up - for (auto iter = result.begin(); iter != result.end(); ++iter) { - result_backup[iter->first] = result[iter->first]; - } - backupTimer.stop(); - STORM_PRINT("Time Backup " << backupTimer << std::endl); + result_backup = result; + ++iteration; iterationTimer.stop(); STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); @@ -214,7 +210,7 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = false;//true; + bool useMdp = /*false;*/true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min) : @@ -497,7 +493,6 @@ namespace storm { std::map>> &nextBelieves, std::map &result, uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { - storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); storm::pomdp::Belief currentBelief = beliefList[currentBeliefId]; //TODO put this in extra function @@ -571,9 +566,9 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( - std::vector> &grid, uint32_t observation, + std::vector> const &grid, uint32_t observation, std::vector probabilities) { - storm::utility::ConstantsComparator cc(storm::utility::convertNumber(0.00000000001), false); + // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { bool same = true; @@ -757,8 +752,7 @@ namespace storm { uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, - 0).probabilities; + std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { uint32_t observation = pomdp.getObservation(state); if (postProbabilities[state] != storm::utility::zero()) { @@ -782,8 +776,7 @@ namespace storm { uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow( - pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); for (auto const &entry : row) { observation = pomdp.getObservation(entry.getColumn()); distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); @@ -795,15 +788,14 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, storm::pomdp::Belief belief, - uint64_t actionIndex, uint32_t observation, uint64_t id) { - std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + std::vector &beliefIsTarget, std::set &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, + uint64_t id) { + storm::utility::Stopwatch distrWatch(true); + std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow( - pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); for (auto const &entry : row) { if (pomdp.getObservation(entry.getColumn()) == observation) { distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); @@ -811,19 +803,30 @@ namespace storm { } } } + distrWatch.stop(); // We have to normalize the distribution + storm::utility::Stopwatch normalizationWatch(true); auto sum = storm::utility::zero(); for (ValueType const &entry : distributionAfter) { sum += entry; } + for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { distributionAfter[i] /= sum; } + normalizationWatch.stop(); if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - return getBeliefIdInVector(beliefList, observation, distributionAfter); + storm::utility::Stopwatch getWatch(true); + auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); + getWatch.stop(); + //STORM_PRINT("Distribution: "<< distrWatch.getTimeInNanoseconds() << " / Normalization: " << normalizationWatch.getTimeInNanoseconds() << " / getId: " << getWatch.getTimeInNanoseconds() << std::endl) + return res; } else { + storm::utility::Stopwatch pushWatch(true); beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + pushWatch.stop(); + //STORM_PRINT("Distribution: "<< distrWatch.getTimeInNanoseconds() << " / Normalization: " << normalizationWatch.getTimeInNanoseconds() << " / generateBelief: " << pushWatch.getTimeInNanoseconds() << std::endl) return id; } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index cbde135dc..b8ad0fa66 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -27,7 +27,8 @@ namespace storm { std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution); + std::set targetObservations, bool min, + uint64_t gridResolution); private: /** @@ -173,7 +174,7 @@ namespace storm { * @param probabilities * @return */ - uint64_t getBeliefIdInVector(std::vector> &grid, uint32_t observation, + uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, std::vector probabilities); storm::storage::SparseMatrix @@ -181,6 +182,10 @@ namespace storm { storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); + + ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + + storm::utility::ConstantsComparator cc; }; }