Browse Source

Fix of wrong MDP underapproximation

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
bbd3ec7287
  1. 141
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 31
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

141
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -11,6 +11,7 @@
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "storm/api/properties.h"
#include "storm/api/export.h"
#include "storm-parsers/api/storm-parsers.h"
namespace storm {
@ -18,7 +19,9 @@ namespace storm {
namespace modelchecker {
template<typename ValueType, typename RewardModelType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker() {
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(0.00000000001), false);
precision = 0.000000001;
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(precision), false);
useMdp = false;
}
template<typename ValueType, typename RewardModelType>
@ -59,7 +62,8 @@ namespace storm {
//Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache;
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
std::deque<uint64_t> beliefsToBeExpanded;
@ -101,8 +105,7 @@ namespace storm {
storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]};
beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(
targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefsToBeExpanded.push_back(nextId);
++nextId;
}
@ -125,14 +128,12 @@ namespace storm {
result.emplace(std::make_pair(currId, storm::utility::zero<ValueType>()));
result_backup.emplace(std::make_pair(currId, storm::utility::zero<ValueType>()));
uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(beliefList[currId].observation).front());
uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front());
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices);
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, beliefList[currId], action);
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action);
std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) {
@ -166,8 +167,7 @@ namespace storm {
storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]};
beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(
targetObservations.find(observation) != targetObservations.end());
beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
beliefsToBeExpanded.push_back(nextId);
++nextId;
}
@ -201,9 +201,8 @@ namespace storm {
uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(currentBelief.observation).front());
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() : -storm::utility::infinity<ValueType>();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
@ -231,8 +230,7 @@ namespace storm {
auto sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
sum += lambdas[j] * result_backup.at(
getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
}
}
currentValue += iter->second * sum;
@ -243,11 +241,14 @@ namespace storm {
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
}
}
result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
improvement = true;
@ -312,7 +313,7 @@ namespace storm {
//Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache;
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
std::deque<uint64_t> beliefsToBeExpanded;
@ -459,7 +460,7 @@ namespace storm {
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
@ -499,15 +500,17 @@ namespace storm {
(!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
}
}
result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) ||
cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
@ -586,7 +589,7 @@ namespace storm {
std::map<uint64_t, ValueType> result;
std::map<uint64_t, ValueType> result_backup;
// Belief ID -> ActionIndex
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
// Belief ID -> Observation -> Probability
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
@ -611,11 +614,9 @@ namespace storm {
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, currentBelief, action);
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action);
std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) {
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
// THIS CALL IS SLOW
// TODO speed this up
@ -659,7 +660,7 @@ namespace storm {
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
@ -698,11 +699,14 @@ namespace storm {
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
}
}
result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
improvement = true;
@ -721,8 +725,7 @@ namespace storm {
STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl);
beliefGrid.push_back(initialBelief);
beliefIsTarget.push_back(
targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
initialBelief.probabilities, gridResolution);
@ -740,7 +743,6 @@ namespace storm {
overApproxTimer.stop();
// Now onto the under-approximation
bool useMdp = true;
storm::utility::Stopwatch underApproxTimer(true);
ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, false) :
@ -795,7 +797,7 @@ namespace storm {
std::map<uint64_t, ValueType> result;
std::map<uint64_t, ValueType> result_backup;
// Belief ID -> ActionIndex
std::map<uint64_t, uint64_t> chosenActions;
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
// Belief ID -> Observation -> Probability
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
@ -874,7 +876,7 @@ namespace storm {
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
@ -913,15 +915,17 @@ namespace storm {
(!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
chosenActionIndex = action;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
}
}
result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) ||
cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
@ -960,7 +964,6 @@ namespace storm {
overApproxTimer.stop();
// Now onto the under-approximation
bool useMdp = true;
storm::utility::Stopwatch underApproxTimer(true);
ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, true) :
@ -989,7 +992,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min,
bool computeReward) {
std::set<uint64_t> visitedBelieves;
@ -1017,17 +1020,15 @@ namespace storm {
} else {
if (chosenActions.find(currentBeliefId) == chosenActions.end()) {
// If the current Belief is not part of the grid, we have not computed the action to choose yet
chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget,
targetObservations,
chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations,
observationProbabilities,
nextBelieves, result, gridResolution,
currentBeliefId, beliefList.size(), min);
}
for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin();
iter !=
observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) {
for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin();
iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) {
uint32_t observation = iter->first;
uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation];
uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation];
if (visitedBelieves.insert(nextBeliefId).second) {
beliefStateMap[nextBeliefId] = stateId;
++stateId;
@ -1049,12 +1050,14 @@ namespace storm {
}
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(std::vector<ValueType>(beliefStateMap.size()));
for (auto const &iter : beliefStateMap) {
auto currentBelief = beliefList[iter.first];
// Add the reward collected by taking the chosen Action in the belief
rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex(
storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])),
currentBelief));
if (computeReward) {
for (auto const &iter : beliefStateMap) {
auto currentBelief = beliefList[iter.first];
// Add the reward collected by taking the chosen Action in the belief
rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex(
storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])),
currentBelief));
}
}
std::unordered_map<std::string, RewardModelType> rewardModels = {{"std", rewardModel}};
@ -1092,7 +1095,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min,
bool computeRewards) {
std::set<uint64_t> visitedBelieves;
@ -1152,8 +1155,7 @@ namespace storm {
std::map<uint64_t, ValueType> transitionsInStateWithAction;
for (auto iter = observationProbabilities[currentBeliefId][action].begin();
iter !=
observationProbabilities[currentBeliefId][action].end(); ++iter) {
iter != observationProbabilities[currentBeliefId][action].end(); ++iter) {
uint32_t observation = iter->first;
uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation];
if (visitedBelieves.insert(nextBeliefId).second) {
@ -1270,7 +1272,7 @@ namespace storm {
}
template<typename ValueType, typename RewardModelType>
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::extractBestAction(
std::vector<uint64_t> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::extractBestActions(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
@ -1312,7 +1314,7 @@ namespace storm {
// choose the action which results in the value computed by the over-approximation
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
@ -1330,8 +1332,7 @@ namespace storm {
auto sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
sum += lambdas[j] * result.at(
getBeliefIdInVector(beliefList, observation, subSimplex[j]));
sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j]));
}
}
currentValue += iter->second * sum;
@ -1343,10 +1344,28 @@ namespace storm {
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
chosenActionIndex = action;
if (!cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
}
}
return chosenActionIndex;
return chosenActionIndices;
}
template<typename ValueType, typename RewardModelType>
std::vector<uint64_t> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::extractBestAction(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> &targetObservations,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) {
return std::vector<uint64_t>{
extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId,
nextId, min).front()};
}

31
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -58,7 +58,30 @@ namespace storm {
* @param min
* @return
*/
uint64_t extractBestAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<uint64_t> extractBestActions(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> &target_observations,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId,
bool min);
/**
* TODO
* @param pomdp
* @param beliefList
* @param observationProbabilities
* @param nextBelieves
* @param result
* @param gridResolution
* @param currentBeliefId
* @param nextId
* @param min
* @return
*/
std::vector<uint64_t> extractBestAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> &target_observations,
@ -90,7 +113,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward);
ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@ -100,7 +123,7 @@ namespace storm {
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, uint64_t> chosenActions,
std::map<uint64_t, std::vector<uint64_t>> chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards);
/**
@ -200,6 +223,8 @@ namespace storm {
ValueType getRewardAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t action, storm::pomdp::Belief<ValueType> belief);
storm::utility::ConstantsComparator<ValueType> cc;
double precision;
bool useMdp;
};
}

Loading…
Cancel
Save