Browse Source

Fix of wrong MDP underapproximation

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
bbd3ec7287
  1. 141
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 31
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

141
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -11,6 +11,7 @@
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "storm/api/properties.h" #include "storm/api/properties.h"
#include "storm/api/export.h"
#include "storm-parsers/api/storm-parsers.h" #include "storm-parsers/api/storm-parsers.h"
namespace storm { namespace storm {
@ -18,7 +19,9 @@ namespace storm {
namespace modelchecker { namespace modelchecker {
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker() { ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker() {
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(0.00000000001), false);
precision = 0.000000001;
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(precision), false);
useMdp = false;
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
@ -59,7 +62,8 @@ namespace storm {
//Use caching to avoid multiple computation of the subsimplices and lambdas //Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache; std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache; std::map<uint64_t, std::vector<ValueType>> lambdaCache;
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
std::deque<uint64_t> beliefsToBeExpanded; std::deque<uint64_t> beliefsToBeExpanded;
@ -101,8 +105,7 @@ namespace storm {
storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]};
beliefList.push_back(gridBelief); beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief); beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(
targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefsToBeExpanded.push_back(nextId); beliefsToBeExpanded.push_back(nextId);
++nextId; ++nextId;
} }
@ -125,14 +128,12 @@ namespace storm {
result.emplace(std::make_pair(currId, storm::utility::zero<ValueType>())); result.emplace(std::make_pair(currId, storm::utility::zero<ValueType>()));
result_backup.emplace(std::make_pair(currId, storm::utility::zero<ValueType>())); result_backup.emplace(std::make_pair(currId, storm::utility::zero<ValueType>()));
uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(beliefList[currId].observation).front());
uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front());
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices); std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices);
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices); std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, beliefList[currId], action);
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action);
std::map<uint32_t, uint64_t> actionObservationBelieves; std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin(); for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) { iter != actionObservationProbabilities.end(); ++iter) {
@ -166,8 +167,7 @@ namespace storm {
storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]}; storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]};
beliefList.push_back(gridBelief); beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief); beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(
targetObservations.find(observation) != targetObservations.end());
beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
beliefsToBeExpanded.push_back(nextId); beliefsToBeExpanded.push_back(nextId);
++nextId; ++nextId;
} }
@ -201,9 +201,8 @@ namespace storm {
uint64_t numChoices = pomdp.getNumberOfChoices( uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(currentBelief.observation).front()); pomdp.getStatesWithObservation(currentBelief.observation).front());
// Initialize the values for the value iteration // Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() : -storm::utility::infinity<ValueType>();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue; ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
@ -231,8 +230,7 @@ namespace storm {
auto sum = storm::utility::zero<ValueType>(); auto sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) { for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) { if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
sum += lambdas[j] * result_backup.at(
getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
} }
} }
currentValue += iter->second * sum; currentValue += iter->second * sum;
@ -243,11 +241,14 @@ namespace storm {
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) || cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) { cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue; chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
} }
} }
result[currentBelief.id] = chosenValue; result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement // Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) { if (cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
improvement = true; improvement = true;
@ -312,7 +313,7 @@ namespace storm {
//Use caching to avoid multiple computation of the subsimplices and lambdas //Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache; std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache; std::map<uint64_t, std::vector<ValueType>> lambdaCache;
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
std::deque<uint64_t> beliefsToBeExpanded; std::deque<uint64_t> beliefsToBeExpanded;
@ -459,7 +460,7 @@ namespace storm {
// Initialize the values for the value iteration // Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>(); : -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue; ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
@ -499,15 +500,17 @@ namespace storm {
(!min && (!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) || cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) { cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue; chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
} }
} }
result[currentBelief.id] = chosenValue; result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement // Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) || if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) ||
cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) { cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
@ -586,7 +589,7 @@ namespace storm {
std::map<uint64_t, ValueType> result; std::map<uint64_t, ValueType> result;
std::map<uint64_t, ValueType> result_backup; std::map<uint64_t, ValueType> result_backup;
// Belief ID -> ActionIndex // Belief ID -> ActionIndex
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
// Belief ID -> Observation -> Probability // Belief ID -> Observation -> Probability
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities; std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
@ -611,11 +614,9 @@ namespace storm {
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices); std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, currentBelief, action);
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action);
std::map<uint32_t, uint64_t> actionObservationBelieves; std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) {
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first; uint32_t observation = iter->first;
// THIS CALL IS SLOW // THIS CALL IS SLOW
// TODO speed this up // TODO speed this up
@ -659,7 +660,7 @@ namespace storm {
// Initialize the values for the value iteration // Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>(); : -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue; ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
@ -698,11 +699,14 @@ namespace storm {
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) || cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) { cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue; chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
} }
} }
result[currentBelief.id] = chosenValue; result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement // Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) { if (cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
improvement = true; improvement = true;
@ -721,8 +725,7 @@ namespace storm {
STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl);
beliefGrid.push_back(initialBelief); beliefGrid.push_back(initialBelief);
beliefIsTarget.push_back(
targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas( std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
initialBelief.probabilities, gridResolution); initialBelief.probabilities, gridResolution);
@ -740,7 +743,6 @@ namespace storm {
overApproxTimer.stop(); overApproxTimer.stop();
// Now onto the under-approximation // Now onto the under-approximation
bool useMdp = true;
storm::utility::Stopwatch underApproxTimer(true); storm::utility::Stopwatch underApproxTimer(true);
ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, false) : result, chosenActions, gridResolution, initialBelief.id, min, false) :
@ -795,7 +797,7 @@ namespace storm {
std::map<uint64_t, ValueType> result; std::map<uint64_t, ValueType> result;
std::map<uint64_t, ValueType> result_backup; std::map<uint64_t, ValueType> result_backup;
// Belief ID -> ActionIndex // Belief ID -> ActionIndex
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
// Belief ID -> Observation -> Probability // Belief ID -> Observation -> Probability
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities; std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
@ -874,7 +876,7 @@ namespace storm {
// Initialize the values for the value iteration // Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>(); : -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue; ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
@ -913,15 +915,17 @@ namespace storm {
(!min && (!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) || cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) { cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue; chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
} }
} }
result[currentBelief.id] = chosenValue; result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement // Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) || if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) ||
cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) { cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
@ -960,7 +964,6 @@ namespace storm {
overApproxTimer.stop(); overApproxTimer.stop();
// Now onto the under-approximation // Now onto the under-approximation
bool useMdp = true;
storm::utility::Stopwatch underApproxTimer(true); storm::utility::Stopwatch underApproxTimer(true);
ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, true) : result, chosenActions, gridResolution, initialBelief.id, min, true) :
@ -989,7 +992,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities, std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves, std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result, std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, uint64_t gridResolution, uint64_t initialBeliefId, bool min,
bool computeReward) { bool computeReward) {
std::set<uint64_t> visitedBelieves; std::set<uint64_t> visitedBelieves;
@ -1017,17 +1020,15 @@ namespace storm {
} else { } else {
if (chosenActions.find(currentBeliefId) == chosenActions.end()) { if (chosenActions.find(currentBeliefId) == chosenActions.end()) {
// If the current Belief is not part of the grid, we have not computed the action to choose yet // If the current Belief is not part of the grid, we have not computed the action to choose yet
chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget,
targetObservations,
chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations,
observationProbabilities, observationProbabilities,
nextBelieves, result, gridResolution, nextBelieves, result, gridResolution,
currentBeliefId, beliefList.size(), min); currentBeliefId, beliefList.size(), min);
} }
for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin();
iter !=
observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) {
for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin();
iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) {
uint32_t observation = iter->first; uint32_t observation = iter->first;
uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation];
uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation];
if (visitedBelieves.insert(nextBeliefId).second) { if (visitedBelieves.insert(nextBeliefId).second) {
beliefStateMap[nextBeliefId] = stateId; beliefStateMap[nextBeliefId] = stateId;
++stateId; ++stateId;
@ -1049,12 +1050,14 @@ namespace storm {
} }
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(std::vector<ValueType>(beliefStateMap.size())); storm::models::sparse::StandardRewardModel<ValueType> rewardModel(std::vector<ValueType>(beliefStateMap.size()));
for (auto const &iter : beliefStateMap) {
auto currentBelief = beliefList[iter.first];
// Add the reward collected by taking the chosen Action in the belief
rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex(
storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])),
currentBelief));
if (computeReward) {
for (auto const &iter : beliefStateMap) {
auto currentBelief = beliefList[iter.first];
// Add the reward collected by taking the chosen Action in the belief
rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex(
storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])),
currentBelief));
}
} }
std::unordered_map<std::string, RewardModelType> rewardModels = {{"std", rewardModel}}; std::unordered_map<std::string, RewardModelType> rewardModels = {{"std", rewardModel}};
@ -1092,7 +1095,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities, std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves, std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result, std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, uint64_t gridResolution, uint64_t initialBeliefId, bool min,
bool computeRewards) { bool computeRewards) {
std::set<uint64_t> visitedBelieves; std::set<uint64_t> visitedBelieves;
@ -1152,8 +1155,7 @@ namespace storm {
std::map<uint64_t, ValueType> transitionsInStateWithAction; std::map<uint64_t, ValueType> transitionsInStateWithAction;
for (auto iter = observationProbabilities[currentBeliefId][action].begin(); for (auto iter = observationProbabilities[currentBeliefId][action].begin();
iter !=
observationProbabilities[currentBeliefId][action].end(); ++iter) {
iter != observationProbabilities[currentBeliefId][action].end(); ++iter) {
uint32_t observation = iter->first; uint32_t observation = iter->first;
uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation];
if (visitedBelieves.insert(nextBeliefId).second) { if (visitedBelieves.insert(nextBeliefId).second) {
@ -1270,7 +1272,7 @@ namespace storm {
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::extractBestAction(
std::vector<uint64_t> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::extractBestActions(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList, std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget, std::vector<bool> &beliefIsTarget,
@ -1312,7 +1314,7 @@ namespace storm {
// choose the action which results in the value computed by the over-approximation // choose the action which results in the value computed by the over-approximation
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>(); : -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue; ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
@ -1330,8 +1332,7 @@ namespace storm {
auto sum = storm::utility::zero<ValueType>(); auto sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) { for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) { if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
sum += lambdas[j] * result.at(
getBeliefIdInVector(beliefList, observation, subSimplex[j]));
sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j]));
} }
} }
currentValue += iter->second * sum; currentValue += iter->second * sum;
@ -1343,10 +1344,28 @@ namespace storm {
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) || cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) { cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue; chosenValue = currentValue;
chosenActionIndex = action;
if (!cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
} }
} }
return chosenActionIndex;
return chosenActionIndices;
}
template<typename ValueType, typename RewardModelType>
std::vector<uint64_t> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::extractBestAction(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> &targetObservations,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) {
return std::vector<uint64_t>{
extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId,
nextId, min).front()};
} }

31
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -58,7 +58,30 @@ namespace storm {
* @param min * @param min
* @return * @return
*/ */
uint64_t extractBestAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<uint64_t> extractBestActions(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> &target_observations,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId,
bool min);
/**
* TODO
* @param pomdp
* @param beliefList
* @param observationProbabilities
* @param nextBelieves
* @param result
* @param gridResolution
* @param currentBeliefId
* @param nextId
* @param min
* @return
*/
std::vector<uint64_t> extractBestAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList, std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget, std::vector<bool> &beliefIsTarget,
std::set<uint32_t> &target_observations, std::set<uint32_t> &target_observations,
@ -90,7 +113,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities, std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves, std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result, std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward);
ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@ -100,7 +123,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities, std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves, std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result, std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards);
/** /**
@ -200,6 +223,8 @@ namespace storm {
ValueType getRewardAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t action, storm::pomdp::Belief<ValueType> belief); ValueType getRewardAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t action, storm::pomdp::Belief<ValueType> belief);
storm::utility::ConstantsComparator<ValueType> cc; storm::utility::ConstantsComparator<ValueType> cc;
double precision;
bool useMdp;
}; };
} }

Loading…
Cancel
Save