From 21e417bdaca04eae0186a1282f4e17cb83fa6e53 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 10:55:13 +0100 Subject: [PATCH] Added on-the-fly belief grid generation to avoid computations for unreachable beliefs --- src/storm-pomdp-cli/storm-pomdp.cpp | 14 +- .../ApproximatePOMDPModelchecker.cpp | 251 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 5 + 3 files changed, 266 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 96be00668..0af965ddd 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -200,12 +200,18 @@ int main(const int argc, const char** argv) { double overRes = storm::utility::one(); double underRes = storm::utility::zero(); std::unique_ptr> result; - result = checker.computeReachabilityProbability(*pomdp, targetObservationSet, - probFormula.getOptimalityType() == - storm::OptimizationDirection::Minimize, - pomdpSettings.getGridResolution()); + + //result = checker.refineReachabilityProbability(*pomdp, targetObservationSet,probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, pomdpSettings.getGridResolution(),1,10); + result = checker.computeReachabilityProbabilityOTF(*pomdp, targetObservationSet, probFormula.getOptimalityType() == storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution()); overRes = result->OverapproximationValue; underRes = result->UnderapproximationValue; + if (overRes != underRes) { + STORM_PRINT("Overapproximation Result: " << overRes << std::endl) + STORM_PRINT("Underapproximation Result: " << underRes << std::endl) + } else { + STORM_PRINT("Result: " << overRes << std::endl) + } } } else if (formula->isRewardOperatorFormula()) { if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 14428c5bf..8d0650c54 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -42,6 +42,257 @@ namespace storm { return res; } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) + + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + std::vector> beliefList; + std::vector beliefIsTarget; + std::vector> beliefGrid; + std::map result; + std::map result_backup; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + std::map chosenActions; + + std::deque beliefsToBeExpanded; + + // Belief ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; + + uint64_t nextId = 0; + storm::utility::Stopwatch expansionTimer(true); + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // for the initial belief, add the triangulated initial states + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> initSubSimplex = initTemp.first; + std::vector initLambdas = initTemp.second; + subSimplexCache[0] = initSubSimplex; + lambdaCache[0] = initLambdas; + bool initInserted = false; + + + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { + uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); + if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { + if (searchResult == 0) { + // the initial belief is on the grid itself + initInserted = true; + beliefGrid.push_back(initialBelief); + beliefsToBeExpanded.push_back(0); + } else { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + + + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + bool isTarget = beliefIsTarget[currId]; + if (isTarget) { + result.emplace(std::make_pair(currId, storm::utility::one())); + result_backup.emplace(std::make_pair(currId, storm::utility::one())); + } else { + result.emplace(std::make_pair(currId, storm::utility::zero())); + result_backup.emplace(std::make_pair(currId, storm::utility::zero())); + + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(beliefList[currId].observation).front()); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, beliefList[currId], action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, + observation, nextId); + nextId = beliefList.size(); + actionObservationBelieves[observation] = idNextBelief; + //Triangulate here and put the possibly resulting belief in the grid + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(idNextBelief) > 0) { + // TODO is this necessary here? Think later + subSimplex = subSimplexCache[idNextBelief]; + lambdas = lambdaCache[idNextBelief]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + beliefList[idNextBelief].probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } + + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; + } + observationProbabilities.emplace( + std::make_pair(currId, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); + } + } + expansionTimer.stop(); + STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) + STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) + STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + + storm::utility::Stopwatch overApproxTimer(true); + // Value Iteration + while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); + STORM_LOG_DEBUG("Iteration " << iteration + 1); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + if (!isTarget) { + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + // Initialize the values for the value iteration + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + currentValue = storm::utility::zero(); + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + // TODO is this necessary here? Everything should have already been computed + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } + auto sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + currentValue += iter->second * sum; + } + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + chosenValue = currentValue; + chosenActionIndex = action; + } + } + result[currentBelief.id] = chosenValue; + chosenActions[currentBelief.id] = chosenActionIndex; + // Check if the iteration brought an improvement + if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + improvement = true; + } + } + } + finished = !improvement; + // back up + result_backup = result; + + ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + } + + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; + } + } + overApproxTimer.stop(); + + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, false); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, underApprox}); + + } + template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 4792f9127..5399f079d 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -25,6 +25,11 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t startingResolution, uint64_t stepSize, uint64_t maxNrOfRefinements); + std::unique_ptr> + computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution); + std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min,