From bbd3ec7287184d4f6d79588fe8ed483e273f283e Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 22 Nov 2019 16:21:54 +0100 Subject: [PATCH 1/9] Fix of wrong MDP underapproximation --- .../ApproximatePOMDPModelchecker.cpp | 141 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 31 +++- 2 files changed, 108 insertions(+), 64 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index a32d7dcc0..ff9bb522b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -11,6 +11,7 @@ #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/api/properties.h" +#include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" namespace storm { @@ -18,7 +19,9 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); + precision = 0.000000001; + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); + useMdp = false; } template @@ -59,7 +62,8 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; + std::deque beliefsToBeExpanded; @@ -101,8 +105,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; } @@ -125,14 +128,12 @@ namespace storm { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currId].observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currId], action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { @@ -166,8 +167,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; } @@ -201,9 +201,8 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -231,8 +230,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -243,11 +241,14 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; @@ -312,7 +313,7 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; std::deque beliefsToBeExpanded; @@ -459,7 +460,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -499,15 +500,17 @@ namespace storm { (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { @@ -586,7 +589,7 @@ namespace storm { std::map result; std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -611,11 +614,9 @@ namespace storm { std::vector> nextBelievesInAction(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up @@ -659,7 +660,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -698,11 +699,14 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; @@ -721,8 +725,7 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::pair>, std::vector> temp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); @@ -740,7 +743,6 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, false) : @@ -795,7 +797,7 @@ namespace storm { std::map result; std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -874,7 +876,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -913,15 +915,17 @@ namespace storm { (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { @@ -960,7 +964,6 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, true) : @@ -989,7 +992,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward) { std::set visitedBelieves; @@ -1017,17 +1020,15 @@ namespace storm { } else { if (chosenActions.find(currentBeliefId) == chosenActions.end()) { // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, - targetObservations, + chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, beliefList.size(), min); } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin(); - iter != - observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin(); + iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation]; + uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation]; if (visitedBelieves.insert(nextBeliefId).second) { beliefStateMap[nextBeliefId] = stateId; ++stateId; @@ -1049,12 +1050,14 @@ namespace storm { } storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])), - currentBelief)); + if (computeReward) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } } std::unordered_map rewardModels = {{"std", rewardModel}}; @@ -1092,7 +1095,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards) { std::set visitedBelieves; @@ -1152,8 +1155,7 @@ namespace storm { std::map transitionsInStateWithAction; for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != - observationProbabilities[currentBeliefId][action].end(); ++iter) { + iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -1270,7 +1272,7 @@ namespace storm { } template - uint64_t ApproximatePOMDPModelchecker::extractBestAction( + std::vector ApproximatePOMDPModelchecker::extractBestActions( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, @@ -1312,7 +1314,7 @@ namespace storm { // choose the action which results in the value computed by the over-approximation ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -1330,8 +1332,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result.at( - getBeliefIdInVector(beliefList, observation, subSimplex[j])); + sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -1343,10 +1344,28 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } - return chosenActionIndex; + return chosenActionIndices; + } + + template + std::vector ApproximatePOMDPModelchecker::extractBestAction( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { + return std::vector{ + extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, + nextId, min).front()}; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 1222d0b1f..f5e7ff805 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -58,7 +58,30 @@ namespace storm { * @param min * @return */ - uint64_t extractBestAction(storm::models::sparse::Pomdp const &pomdp, + std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); + + /** + * TODO + * @param pomdp + * @param beliefList + * @param observationProbabilities + * @param nextBelieves + * @param result + * @param gridResolution + * @param currentBeliefId + * @param nextId + * @param min + * @return + */ + std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set &target_observations, @@ -90,7 +113,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, @@ -100,7 +123,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); /** @@ -200,6 +223,8 @@ namespace storm { ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); storm::utility::ConstantsComparator cc; + double precision; + bool useMdp; }; } From c663edbd853f96f05191f5d83204af6e2723b079 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 22 Nov 2019 16:29:29 +0100 Subject: [PATCH 2/9] Added generation of an MDP for the over-approximation in the on-the-fly state exploration --- .../ApproximatePOMDPModelchecker.cpp | 96 ++++++++++++++++--- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index ff9bb522b..c1ce9449e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -81,6 +81,15 @@ namespace storm { beliefIsTarget.push_back( targetObservations.find(initialBelief.observation) != targetObservations.end()); + // These are the components to build the MDP from the grid + std::map beliefStateMap; + std::vector>> mdpTransitions; + std::vector targetStates; + uint64_t mdpStateId = 0; + + beliefStateMap[initialBelief.id] = mdpStateId; + ++mdpStateId; + // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); @@ -90,6 +99,8 @@ namespace storm { lambdaCache[0] = initLambdas; bool initInserted = false; + std::vector> initTransitionsInBelief; + std::map initTransitionInActionBelief; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { @@ -107,12 +118,21 @@ namespace storm { beliefGrid.push_back(gridBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + initTransitionInActionBelief[mdpStateId] = initLambdas[j]; ++nextId; + ++mdpStateId; } } } } + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + if (!initTransitionInActionBelief.empty()) { + initTransitionsInBelief.push_back(initTransitionInActionBelief); + mdpTransitions.push_back(initTransitionsInBelief); + } //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting @@ -124,6 +144,14 @@ namespace storm { if (isTarget) { result.emplace(std::make_pair(currId, storm::utility::one())); result_backup.emplace(std::make_pair(currId, storm::utility::one())); + + // MDP stuff + std::vector> transitionsInBelief; + targetStates.push_back(beliefStateMap[currId]); + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + mdpTransitions.push_back(transitionsInBelief); } else { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); @@ -131,12 +159,14 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); + std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + std::map transitionInActionBelief; + + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up @@ -159,7 +189,6 @@ namespace storm { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } - for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { @@ -169,17 +198,31 @@ namespace storm { beliefGrid.push_back(gridBelief); beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; + ++mdpStateId; + } else { + transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; } } } } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; + if (!transitionInActionBelief.empty()) { + transitionsInBelief.push_back(transitionInActionBelief); + } } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief); } } expansionTimer.stop(); @@ -187,6 +230,18 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + overApproxMdp.printModelInformationToStream(std::cout); + storm::utility::Stopwatch overApproxTimer(true); // Value Iteration while (!finished && iteration < maxIterations) { @@ -276,21 +331,34 @@ namespace storm { } overApproxTimer.stop(); - storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = storm::utility::zero(); + /*storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, false); - underApproxTimer.stop(); + underApproxTimer.stop();*/ - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); + // STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + + + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); + + std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } From c6902e0ca73fe084ed68074115e561931c711503 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Mon, 25 Nov 2019 11:08:07 +0100 Subject: [PATCH 3/9] Added reward MDP generation for the overapproximation --- .../ApproximatePOMDPModelchecker.cpp | 114 +++++++++++++++--- 1 file changed, 96 insertions(+), 18 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c1ce9449e..bbb87684e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -78,8 +78,7 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); // These are the components to build the MDP from the grid std::map beliefStateMap; @@ -344,8 +343,6 @@ namespace storm { auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); - - std::vector parameterNames; storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); @@ -398,8 +395,16 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // These are the components to build the MDP from the grid + std::map beliefStateMap; + std::vector>> mdpTransitions; + std::vector targetStates; + uint64_t mdpStateId = 0; + + beliefStateMap[initialBelief.id] = mdpStateId; + ++mdpStateId; // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( @@ -410,6 +415,8 @@ namespace storm { lambdaCache[0] = initLambdas; bool initInserted = false; + std::vector> initTransitionsInBelief; + std::map initTransitionInActionBelief; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { @@ -429,11 +436,22 @@ namespace storm { targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; + + beliefStateMap[nextId] = mdpStateId; + initTransitionInActionBelief[mdpStateId] = initLambdas[j]; + ++nextId; + ++mdpStateId; } } } } + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + if (!initTransitionInActionBelief.empty()) { + initTransitionsInBelief.push_back(initTransitionInActionBelief); + mdpTransitions.push_back(initTransitionsInBelief); + } + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting @@ -445,17 +463,26 @@ namespace storm { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - if (!isTarget) { - + if (isTarget) { + // MDP stuff + std::vector> transitionsInBelief; + targetStates.push_back(beliefStateMap[currId]); + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + mdpTransitions.push_back(transitionsInBelief); + } else { uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); std::vector actionRewardsInState(numChoices); + std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; + std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW @@ -490,7 +517,13 @@ namespace storm { beliefIsTarget.push_back( targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; + ++mdpStateId; + } else { + transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; } } } @@ -499,12 +532,20 @@ namespace storm { nextBelievesInAction[action] = actionObservationBelieves; actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); + if (!transitionInActionBelief.empty()) { + transitionsInBelief.push_back(transitionInActionBelief); + } } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief); } } expansionTimer.stop(); @@ -512,6 +553,31 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); + overApproxMdp.printModelInformationToStream(std::cout); + storm::utility::Stopwatch overApproxTimer(true); // Value Iteration while (!finished && iteration < maxIterations) { @@ -600,13 +666,12 @@ namespace storm { auto overApprox = storm::utility::zero(); for (size_t j = 0; j < initLambdas.size(); ++j) { if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; } } overApproxTimer.stop(); - + ValueType underApprox = storm::utility::zero(); + /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, true); @@ -615,13 +680,26 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl); + << std::endl);*/ STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); + + std::string propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } From 4664b4244b9c50edba4b1b8f68404c883696b2ba Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 26 Nov 2019 11:01:22 +0100 Subject: [PATCH 4/9] Refactoring of on-the-fly computation to reduce code duplication --- .../ApproximatePOMDPModelchecker.cpp | 410 +++--------------- .../ApproximatePOMDPModelchecker.h | 15 + 2 files changed, 73 insertions(+), 352 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index bbb87684e..1a2a24a38 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,6 +22,7 @@ namespace storm { precision = 0.000000001; cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); useMdp = false; + maxIterations = 1000; } template @@ -47,327 +48,15 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - uint64_t maxIterations = 100; - bool finished = false; - uint64_t iteration = 0; - std::vector> beliefList; - std::vector beliefIsTarget; - std::vector> beliefGrid; - std::map result; - std::map result_backup; - //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - std::map> chosenActions; - - - std::deque beliefsToBeExpanded; - - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - - uint64_t nextId = 0; - storm::utility::Stopwatch expansionTimer(true); - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - // These are the components to build the MDP from the grid - std::map beliefStateMap; - std::vector>> mdpTransitions; - std::vector targetStates; - uint64_t mdpStateId = 0; - - beliefStateMap[initialBelief.id] = mdpStateId; - ++mdpStateId; - - // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = initTemp.first; - std::vector initLambdas = initTemp.second; - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; - bool initInserted = false; - - std::vector> initTransitionsInBelief; - std::map initTransitionInActionBelief; - - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { - uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); - if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { - if (searchResult == 0) { - // the initial belief is on the grid itself - initInserted = true; - beliefGrid.push_back(initialBelief); - beliefsToBeExpanded.push_back(0); - } else { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - - beliefStateMap[nextId] = mdpStateId; - initTransitionInActionBelief[mdpStateId] = initLambdas[j]; - ++nextId; - ++mdpStateId; - } - } - } - } - - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - if (!initTransitionInActionBelief.empty()) { - initTransitionsInBelief.push_back(initTransitionInActionBelief); - mdpTransitions.push_back(initTransitionsInBelief); - } - - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - - // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - if (isTarget) { - result.emplace(std::make_pair(currId, storm::utility::one())); - result_backup.emplace(std::make_pair(currId, storm::utility::one())); - - // MDP stuff - std::vector> transitionsInBelief; - targetStates.push_back(beliefStateMap[currId]); - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - mdpTransitions.push_back(transitionsInBelief); - } else { - result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - std::vector> transitionsInBelief; - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); - std::map actionObservationBelieves; - std::map transitionInActionBelief; - - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, - observation, nextId); - nextId = beliefList.size(); - actionObservationBelieves[observation] = idNextBelief; - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(idNextBelief) > 0) { - // TODO is this necessary here? Think later - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - - beliefStateMap[nextId] = mdpStateId; - transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; - ++nextId; - ++mdpStateId; - } else { - transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; - } - } - } - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - if (!transitionInActionBelief.empty()) { - transitionsInBelief.push_back(transitionInActionBelief); - } - } - observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); - if (transitionsInBelief.empty()) { - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - } - mdpTransitions.push_back(transitionsInBelief); - } - } - expansionTimer.stop(); - STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) - STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) - STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) - - storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); - overApproxMdp.printModelInformationToStream(std::cout); - - storm::utility::Stopwatch overApproxTimer(true); - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - // TODO is this necessary here? Everything should have already been computed - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } + if (computeRewards) { + RewardModelType const &pomdpRewardModel = pomdp.getUniqueRewardModel(); } - overApproxTimer.stop(); - - ValueType underApprox = storm::utility::zero(); - /*storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false); - underApproxTimer.stop();*/ - - // STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::vector parameterNames; - storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); - - std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); - - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; std::vector> beliefList; @@ -407,8 +96,7 @@ namespace storm { ++mdpStateId; // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; subSimplexCache[0] = initSubSimplex; @@ -432,8 +120,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; @@ -461,9 +148,11 @@ namespace storm { beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); if (isTarget) { + // Depending on whether we compute rewards, we select the right initial result + result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); + result_backup.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); + // MDP stuff std::vector> transitionsInBelief; targetStates.push_back(beliefStateMap[currId]); @@ -472,6 +161,9 @@ namespace storm { transitionsInBelief.push_back(transitionInActionBelief); mdpTransitions.push_back(transitionsInBelief); } else { + result.emplace(std::make_pair(currId, storm::utility::zero())); + result_backup.emplace(std::make_pair(currId, storm::utility::zero())); + uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); @@ -530,15 +222,20 @@ namespace storm { } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + beliefList[currId]); + } if (!transitionInActionBelief.empty()) { transitionsInBelief.push_back(transitionInActionBelief); } } observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + } + if (transitionsInBelief.empty()) { std::map transitionInActionBelief; @@ -563,19 +260,21 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); overApproxMdp.printModelInformationToStream(std::cout); storm::utility::Stopwatch overApproxTimer(true); @@ -589,16 +288,14 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; + currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -622,8 +319,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } @@ -631,8 +327,7 @@ namespace storm { } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { @@ -646,8 +341,7 @@ namespace storm { chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + if (!cc.isEqual(result_backup[currentBelief.id], result[currentBelief.id])) { improvement = true; } } @@ -674,7 +368,7 @@ namespace storm { /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); underApproxTimer.stop(); STORM_PRINT("Time Overapproximation: " << overApproxTimer @@ -690,7 +384,9 @@ namespace storm { std::vector parameterNames; storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - std::string propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); @@ -700,16 +396,27 @@ namespace storm { STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); + } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { storm::utility::Stopwatch beliefGridTimer(true); - uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; @@ -915,7 +622,6 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { storm::utility::Stopwatch beliefGridTimer(true); - uint64_t maxIterations = 100; bool finished = false; uint64_t iteration = 0; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f5e7ff805..78c2b5536 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -45,6 +45,20 @@ namespace storm { uint64_t gridResolution); private: + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + /** * TODO * @param pomdp @@ -225,6 +239,7 @@ namespace storm { storm::utility::ConstantsComparator cc; double precision; bool useMdp; + uint64_t maxIterations; }; } From 8f81958268781e4ee9f6c93312eaba52dd69fca2 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 26 Nov 2019 12:33:20 +0100 Subject: [PATCH 5/9] Refactoring of reachability reward and probability methods to reduce code duplication --- .../ApproximatePOMDPModelchecker.cpp | 310 +++--------------- .../ApproximatePOMDPModelchecker.h | 14 + 2 files changed, 66 insertions(+), 258 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1a2a24a38..8d43e3977 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -414,12 +414,17 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); bool finished = false; uint64_t iteration = 0; + if (computeRewards) { + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + } + std::vector> beliefList; std::vector beliefIsTarget; uint64_t nextId = 0; @@ -427,13 +432,10 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); beliefGridTimer.stop(); @@ -448,24 +450,28 @@ namespace storm { std::map>> observationProbabilities; // current ID -> action -> next ID std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { - result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); + result_backup.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - //TODO put this in extra function - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative + uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); + std::vector actionRewardsInState(numChoices); + for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; @@ -473,23 +479,24 @@ namespace storm { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief); + } } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + } } + } nextBeliefGeneration.stop(); @@ -508,16 +515,14 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); + currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -530,8 +535,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -540,16 +545,15 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } + currentValue += iter->second * sum; } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { @@ -561,7 +565,7 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + if (!cc.isEqual(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) { improvement = true; } } @@ -580,17 +584,14 @@ namespace storm { beliefGrid.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = temp.first; std::vector initLambdas = temp.second; auto overApprox = storm::utility::zero(); for (size_t j = 0; j < initLambdas.size(); ++j) { if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; } } overApproxTimer.stop(); @@ -598,9 +599,9 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min, false); + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -608,231 +609,24 @@ namespace storm { << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachability(pomdp, targetObservations, min, gridResolution, false); } - //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication! template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { - storm::utility::Stopwatch beliefGridTimer(true); - bool finished = false; - uint64_t iteration = 0; - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - - std::vector> beliefList; - std::vector beliefIsTarget; - uint64_t nextId = 0; - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - - std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); - nextId = beliefList.size(); - beliefGridTimer.stop(); - - storm::utility::Stopwatch overApproxTimer(true); - // Belief ID -> Value - std::map result; - std::map result_backup; - // Belief ID -> ActionIndex - std::map> chosenActions; - - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - // current ID -> action -> reward - std::map> beliefActionRewards; - - storm::utility::Stopwatch nextBeliefGeneration(true); - for (size_t i = 0; i < beliefGrid.size(); ++i) { - auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - if (!isTarget) { - //TODO put this in extra function - // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative - uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief); - } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - } - - } - nextBeliefGeneration.stop(); - - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - - result[currentBelief.id] = chosenValue; - - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } - } - overApproxTimer.stop(); - - // Now onto the under-approximation - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); - underApproxTimer.stop(); - - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return computeReachability(pomdp, targetObservations, min, gridResolution, true); } template diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 78c2b5536..cdf19dcd5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -59,6 +59,20 @@ namespace storm { std::set targetObservations, bool min, uint64_t gridResolution, bool computeRewards); + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + /** * TODO * @param pomdp From b7b213571d7ad28393743c4e3c05a770687781e7 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 13:27:49 +0100 Subject: [PATCH 6/9] Refactoring of underapproximation procedures to reduce code duplication --- .../ApproximatePOMDPModelchecker.cpp | 247 +++++------------- .../ApproximatePOMDPModelchecker.h | 28 +- 2 files changed, 75 insertions(+), 200 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 8d43e3977..c377461d8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -598,10 +598,8 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -631,119 +629,16 @@ namespace storm { template ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeReward) { - std::set visitedBelieves; - std::deque believesToBeExpanded; - std::map beliefStateMap; - std::vector> transitions; - std::vector targetStates; - - uint64_t stateId = 0; - beliefStateMap[initialBeliefId] = stateId; - ++stateId; - - // Expand the believes - visitedBelieves.insert(initialBeliefId); - believesToBeExpanded.push_back(initialBeliefId); - while (!believesToBeExpanded.empty()) { - auto currentBeliefId = believesToBeExpanded.front(); - std::map transitionsInState; - STORM_LOG_DEBUG("Exploring Belief " << beliefList[currentBeliefId].observation << "||" - << beliefList[currentBeliefId].probabilities); - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states and save them - transitionsInState[beliefStateMap[currentBeliefId]] = storm::utility::one(); - targetStates.push_back(beliefStateMap[currentBeliefId]); - } else { - if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min); - } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin(); - iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation]; - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap[nextBeliefId] = stateId; - ++stateId; - believesToBeExpanded.push_back(nextBeliefId); - } - transitionsInState[beliefStateMap[nextBeliefId]] = iter->second; - } - } - transitions.push_back(transitionsInState); - believesToBeExpanded.pop_front(); - } - - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - if (computeReward) { - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), - currentBelief)); - } - } - - std::unordered_map rewardModels = {{"std", rewardModel}}; - - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(transitions), labeling, rewardModels); - - storm::models::sparse::Dtmc underApproxDtmc(modelComponents); - auto model = std::make_shared>(underApproxDtmc); - model->printModelInformationToStream(std::cout); - - std::string propertyString; - if (computeReward) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); - - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; - } - - template - ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeRewards) { + ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeRewards, bool generateMdp) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -768,40 +663,21 @@ namespace storm { targetStates.push_back(beliefStateMap[currentBeliefId]); actionTransitionStorage.push_back(transitionsInStateWithAction); } else { - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, the next states have not been computed yet. - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currentBeliefId], action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - beliefList[currentBeliefId], - action, - observation, - beliefList.size()); - } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); - } - observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); + chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min) : + extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min); } // Iterate over all actions and add the corresponding transitions - for (uint64_t action = 0; action < numChoices; ++action) { + for (auto const &action : chosenActions[currentBeliefId]) { std::map transitionsInStateWithAction; - for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -826,28 +702,48 @@ namespace storm { labeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents( - buildTransitionMatrix(transitions), labeling); - - storm::models::sparse::Mdp underApproxMdp(modelComponents); + std::shared_ptr> model; + auto transitionMatrix = buildTransitionMatrix(transitions); + if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { + transitionMatrix.makeRowGroupingTrivial(); + } + storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); + if (transitionMatrix.hasTrivialRowGrouping()) { + + storm::models::sparse::Dtmc underApproxMc(modelComponents); + storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); + if (computeRewards) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } + } + underApproxMc.addRewardModel("std", rewardModel); + underApproxMc.restrictRewardModels(std::set({"std"})); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + model = std::make_shared>(underApproxMc); + } else { + storm::models::sparse::Mdp underApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } } + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); + model = std::make_shared>(underApproxMdp); } - - auto model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); std::string propertyString; @@ -857,12 +753,9 @@ namespace storm { propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; } std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; @@ -938,17 +831,10 @@ namespace storm { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); @@ -958,8 +844,7 @@ namespace storm { nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); // choose the action which results in the value computed by the over-approximation - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index cdf19dcd5..2c289d1f5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -134,25 +134,15 @@ namespace storm { * @param min * @return */ - ValueType computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); - - ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); + ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** * From 877c15ed43bbabe17846bead9f89f531970c4513 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 13:41:36 +0100 Subject: [PATCH 7/9] Removed obsolete function to create transition matrices from a data structure not used anymore --- .../ApproximatePOMDPModelchecker.cpp | 19 ------------------- .../ApproximatePOMDPModelchecker.h | 10 +++++----- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c377461d8..90fe6dc3b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -791,25 +791,6 @@ namespace storm { return smb.build(); } - template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix( - std::vector> transitions) { - uint_fast64_t currentRow = 0; - uint64_t nrEntries = 0; - for (auto const &map : transitions) { - nrEntries += map.size(); - } - storm::storage::SparseMatrixBuilder smb(transitions.size(), transitions.size(), nrEntries); - for (auto const &map : transitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - return smb.build(); - } - template std::vector ApproximatePOMDPModelchecker::extractBestActions( storm::models::sparse::Pomdp const &pomdp, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 2c289d1f5..89d1be286 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -232,11 +232,11 @@ namespace storm { uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, std::vector probabilities); - storm::storage::SparseMatrix - buildTransitionMatrix(std::vector> transitions); - - storm::storage::SparseMatrix - buildTransitionMatrix(std::vector>> transitions); + /** + * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) + * @return sparseMatrix representing the transitions + */ + storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); From fe81e0d7cf6cd536fc4066381a7a76dd1540220c Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 13:59:43 +0100 Subject: [PATCH 8/9] Smaller touch-ups (Removal of unused code, pass-by-reference) --- .../ApproximatePOMDPModelchecker.cpp | 40 ++++++++----------- .../ApproximatePOMDPModelchecker.h | 36 ++++++++--------- 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 90fe6dc3b..282f4dd68 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -49,14 +49,10 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - if (computeRewards) { - RewardModelType const &pomdpRewardModel = pomdp.getUniqueRewardModel(); - } - bool finished = false; uint64_t iteration = 0; std::vector> beliefList; @@ -364,7 +360,7 @@ namespace storm { } } overApproxTimer.stop(); - ValueType underApprox = storm::utility::zero(); + auto underApprox = storm::utility::zero(); /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, @@ -401,30 +397,28 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); bool finished = false; uint64_t iteration = 0; - if (computeRewards) { - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - } - std::vector> beliefList; std::vector beliefIsTarget; uint64_t nextId = 0; @@ -616,14 +610,15 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, + uint64_t gridResolution) { return computeReachability(pomdp, targetObservations, min, gridResolution, false); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + std::set const &targetObservations, bool min, uint64_t gridResolution) { return computeReachability(pomdp, targetObservations, min, gridResolution, true); } @@ -632,7 +627,7 @@ namespace storm { ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -796,7 +791,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -870,7 +865,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -925,7 +920,7 @@ namespace storm { template void ApproximatePOMDPModelchecker::constructBeliefGrid( storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution, + std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsKnown, uint64_t nextId) { @@ -971,9 +966,7 @@ namespace storm { storm::utility::convertNumber(gridResolution); storm::pomdp::Belief belief = {newId, observation, distribution}; - STORM_LOG_TRACE( - "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) - << ")," << distribution << "]"); + STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); @@ -1107,7 +1100,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, - std::vector &beliefIsTarget, std::set &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, + std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, + uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 89d1be286..f20f4ebc1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -27,21 +27,21 @@ namespace storm { std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> - computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, + computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); private: @@ -56,7 +56,7 @@ namespace storm { */ std::unique_ptr> computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards); /** @@ -70,7 +70,7 @@ namespace storm { */ std::unique_ptr> computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards); /** @@ -89,7 +89,7 @@ namespace storm { std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &target_observations, + std::set const &target_observations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -110,14 +110,14 @@ namespace storm { * @return */ std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &target_observations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, - bool min); + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); /** * TODO @@ -137,7 +137,7 @@ namespace storm { ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -172,7 +172,7 @@ namespace storm { * */ void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution, + std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsKnown, uint64_t nextId); @@ -206,7 +206,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id); From 8992b70da38147c0d3d1d66a8c4f69a680babe2d Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 29 Nov 2019 15:08:16 +0100 Subject: [PATCH 9/9] Made Value Iteration its own function to reduce duplicate code --- .../ApproximatePOMDPModelchecker.cpp | 214 ++++++------------ .../ApproximatePOMDPModelchecker.h | 11 + 2 files changed, 83 insertions(+), 142 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 282f4dd68..37a29f9b1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -52,14 +52,10 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; std::vector> beliefGrid; std::map result; - std::map result_backup; //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; @@ -147,7 +143,6 @@ namespace storm { if (isTarget) { // Depending on whether we compute rewards, we select the right initial result result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); - result_backup.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); // MDP stuff std::vector> transitionsInBelief; @@ -158,7 +153,6 @@ namespace storm { mdpTransitions.push_back(transitionsInBelief); } else { result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); @@ -274,6 +268,61 @@ namespace storm { overApproxMdp.printModelInformationToStream(std::cout); storm::utility::Stopwatch overApproxTimer(true); + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); + overApproxTimer.stop(); + auto underApprox = storm::utility::zero(); + /* + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl);*/ + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); + + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + + template + ValueType + ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector> &beliefGrid, + std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, + std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards) { + std::map result_backup = result; + uint64_t iteration = 0; + bool finished = false; // Value Iteration while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); @@ -304,9 +353,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - //TODO This should not ne reachable - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -353,45 +401,14 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; + for (size_t j = 0; j < lambdaCache[0].size(); ++j) { + if (lambdaCache[0][j] != storm::utility::zero()) { + overApprox += lambdaCache[0][j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, subSimplexCache[0][j])]; } } - overApproxTimer.stop(); - auto underApprox = storm::utility::zero(); - /* - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); - underApproxTimer.stop(); - - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl);*/ - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::vector parameterNames; - storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + return overApprox; } template @@ -416,8 +433,6 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; @@ -436,7 +451,6 @@ namespace storm { storm::utility::Stopwatch overApproxTimer(true); // Belief ID -> Value std::map result; - std::map result_backup; // Belief ID -> ActionIndex std::map> chosenActions; @@ -446,6 +460,13 @@ namespace storm { std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + subSimplexCache[0] = temp.first; + lambdaCache[0] = temp.second; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { @@ -453,10 +474,8 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); @@ -494,100 +513,11 @@ namespace storm { } nextBeliefGeneration.stop(); - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, - gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (!cc.isEqual(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; - } - } + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); // Now onto the under-approximation diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f20f4ebc1..6d8b50d43 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -240,6 +240,17 @@ namespace storm { ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType + overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + std::vector> &beliefGrid, std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards); + storm::utility::ConstantsComparator cc; double precision; bool useMdp;