From b9c0af6628af7120634f3744e18dead0a40a7f8c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 8 Nov 2019 11:43:09 +0100 Subject: [PATCH] Added on-the-fly belief grid generation for rewards --- .../ApproximatePOMDPModelchecker.cpp | 262 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 4 + 2 files changed, 266 insertions(+) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 8d0650c54..a32d7dcc0 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -293,6 +293,268 @@ namespace storm { } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) + + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + std::vector> beliefList; + std::vector beliefIsTarget; + std::vector> beliefGrid; + std::map result; + std::map result_backup; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + std::map chosenActions; + + std::deque beliefsToBeExpanded; + + // Belief ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; + + uint64_t nextId = 0; + storm::utility::Stopwatch expansionTimer(true); + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // for the initial belief, add the triangulated initial states + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> initSubSimplex = initTemp.first; + std::vector initLambdas = initTemp.second; + subSimplexCache[0] = initSubSimplex; + lambdaCache[0] = initLambdas; + bool initInserted = false; + + + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { + uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); + if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { + if (searchResult == 0) { + // the initial belief is on the grid itself + initInserted = true; + beliefGrid.push_back(initialBelief); + beliefsToBeExpanded.push_back(0); + } else { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + + + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + bool isTarget = beliefIsTarget[currId]; + + result.emplace(std::make_pair(currId, storm::utility::zero())); + result_backup.emplace(std::make_pair(currId, storm::utility::zero())); + if (!isTarget) { + + uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + std::vector actionRewardsInState(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up + uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, + observation, nextId); + nextId = beliefList.size(); + actionObservationBelieves[observation] = idNextBelief; + //Triangulate here and put the possibly resulting belief in the grid + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(idNextBelief) > 0) { + // TODO is this necessary here? Think later + subSimplex = subSimplexCache[idNextBelief]; + lambdas = lambdaCache[idNextBelief]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + beliefList[idNextBelief].probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[idNextBelief] = subSimplex; + lambdaCache[idNextBelief] = lambdas; + } + + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { + // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list + storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; + beliefList.push_back(gridBelief); + beliefGrid.push_back(gridBelief); + beliefIsTarget.push_back( + targetObservations.find(observation) != targetObservations.end()); + beliefsToBeExpanded.push_back(nextId); + ++nextId; + } + } + } + } + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + beliefList[currId]); + } + observationProbabilities.emplace( + std::make_pair(currId, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); + beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + + } + } + expansionTimer.stop(); + STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) + STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) + STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + + storm::utility::Stopwatch overApproxTimer(true); + // Value Iteration + while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); + STORM_LOG_DEBUG("Iteration " << iteration + 1); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + if (!isTarget) { + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + // Initialize the values for the value iteration + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + currentValue = beliefActionRewards[currentBelief.id][action]; + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + //TODO This should not ne reachable + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } + auto sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + + currentValue += iter->second * sum; + } + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + + chosenValue = currentValue; + chosenActionIndex = action; + } + } + + result[currentBelief.id] = chosenValue; + + chosenActions[currentBelief.id] = chosenActionIndex; + // Check if the iteration brought an improvement + if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || + cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + improvement = true; + } + } + } + finished = !improvement; + // back up + result_backup = result; + + ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + } + + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; + } + } + overApproxTimer.stop(); + + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, true); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, underApprox}); + + } + + template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5399f079d..1222d0b1f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -30,6 +30,10 @@ namespace storm { std::set targetObservations, bool min, uint64_t gridResolution); + std::unique_ptr> + computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, + uint64_t gridResolution); + std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min,