diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index a32d7dcc0..ff9bb522b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -11,6 +11,7 @@ #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/api/properties.h" +#include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" namespace storm { @@ -18,7 +19,9 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); + precision = 0.000000001; + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); + useMdp = false; } template @@ -59,7 +62,8 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; + std::deque beliefsToBeExpanded; @@ -101,8 +105,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; } @@ -125,14 +128,12 @@ namespace storm { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currId].observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currId], action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { @@ -166,8 +167,7 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; } @@ -201,9 +201,8 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -231,8 +230,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -243,11 +241,14 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; @@ -312,7 +313,7 @@ namespace storm { //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; std::deque beliefsToBeExpanded; @@ -459,7 +460,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -499,15 +500,17 @@ namespace storm { (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { @@ -586,7 +589,7 @@ namespace storm { std::map result; std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -611,11 +614,9 @@ namespace storm { std::vector> nextBelievesInAction(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up @@ -659,7 +660,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -698,11 +699,14 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; @@ -721,8 +725,7 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::pair>, std::vector> temp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); @@ -740,7 +743,6 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, false) : @@ -795,7 +797,7 @@ namespace storm { std::map result; std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -874,7 +876,7 @@ namespace storm { // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -913,15 +915,17 @@ namespace storm { (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { @@ -960,7 +964,6 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, true) : @@ -989,7 +992,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward) { std::set visitedBelieves; @@ -1017,17 +1020,15 @@ namespace storm { } else { if (chosenActions.find(currentBeliefId) == chosenActions.end()) { // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, - targetObservations, + chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, beliefList.size(), min); } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin(); - iter != - observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin(); + iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation]; + uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation]; if (visitedBelieves.insert(nextBeliefId).second) { beliefStateMap[nextBeliefId] = stateId; ++stateId; @@ -1049,12 +1050,14 @@ namespace storm { } storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])), - currentBelief)); + if (computeReward) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } } std::unordered_map rewardModels = {{"std", rewardModel}}; @@ -1092,7 +1095,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards) { std::set visitedBelieves; @@ -1152,8 +1155,7 @@ namespace storm { std::map transitionsInStateWithAction; for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != - observationProbabilities[currentBeliefId][action].end(); ++iter) { + iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -1270,7 +1272,7 @@ namespace storm { } template - uint64_t ApproximatePOMDPModelchecker::extractBestAction( + std::vector ApproximatePOMDPModelchecker::extractBestActions( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, @@ -1312,7 +1314,7 @@ namespace storm { // choose the action which results in the value computed by the over-approximation ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -1330,8 +1332,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result.at( - getBeliefIdInVector(beliefList, observation, subSimplex[j])); + sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -1343,10 +1344,28 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } - return chosenActionIndex; + return chosenActionIndices; + } + + template + std::vector ApproximatePOMDPModelchecker::extractBestAction( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { + return std::vector{ + extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, + nextId, min).front()}; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 1222d0b1f..f5e7ff805 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -58,7 +58,30 @@ namespace storm { * @param min * @return */ - uint64_t extractBestAction(storm::models::sparse::Pomdp const &pomdp, + std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); + + /** + * TODO + * @param pomdp + * @param beliefList + * @param observationProbabilities + * @param nextBelieves + * @param result + * @param gridResolution + * @param currentBeliefId + * @param nextId + * @param min + * @return + */ + std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set &target_observations, @@ -90,7 +113,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, @@ -100,7 +123,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map chosenActions, + std::map> chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); /** @@ -200,6 +223,8 @@ namespace storm { ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); storm::utility::ConstantsComparator cc; + double precision; + bool useMdp; }; }