Browse Source

Refactoring to include a list of all generated beliefs

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
2bc79e6e07
  1. 208
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 35
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
  3. 12
      src/storm-pomdp/storage/Belief.h

208
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -20,17 +20,26 @@ namespace storm {
bool finished = false; bool finished = false;
uint64_t iteration = 0; uint64_t iteration = 0;
std::vector<storm::pomdp::Belief<ValueType>> beliefList;
uint64_t nextId = 0;
// Initial belief always has ID 0
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId);
++nextId;
beliefList.push_back(initialBelief);
std::vector<Belief<ValueType>> beliefGrid;
std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
std::vector<bool> beliefIsKnown; std::vector<bool> beliefIsKnown;
constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown);
constructBeliefGrid(pomdp, target_observations, gridResolution, beliefList, beliefGrid, beliefIsKnown,
nextId);
nextId = beliefList.size();
std::map<uint64_t, ValueType> result; std::map<uint64_t, ValueType> result;
std::map<uint64_t, ValueType> result_backup; std::map<uint64_t, ValueType> result_backup;
std::map<uint64_t, uint64_t> chosenActions;
std::vector<std::vector<std::map<uint32_t, ValueType>>> observationProbabilities; std::vector<std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
std::vector<std::vector<std::map<uint32_t, Belief<ValueType>>>> nextBelieves;
std::vector<std::vector<std::map<uint32_t, uint64_t>>> nextBelieves;
uint64_t nextId = beliefGrid.size();
for (size_t i = 0; i < beliefGrid.size(); ++i) { for (size_t i = 0; i < beliefGrid.size(); ++i) {
auto currentBelief = beliefGrid[i]; auto currentBelief = beliefGrid[i];
bool isTarget = beliefIsKnown[i]; bool isTarget = beliefIsKnown[i];
@ -42,23 +51,24 @@ namespace storm {
result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>())); result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction; std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction;
std::vector<std::map<uint32_t, Belief<ValueType>>> nextBelievesInAction;
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction;
uint64_t numChoices = pomdp.getNumberOfChoices( uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(currentBelief.observation).front()); pomdp.getStatesWithObservation(currentBelief.observation).front());
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction( std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, currentBelief, action); pomdp, currentBelief, action);
std::map<uint32_t, Belief<ValueType>> actionObservationBelieves;
std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin(); for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) { iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first; uint32_t observation = iter->first;
actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp,
beliefList,
currentBelief, currentBelief,
action, action,
observation, observation,
nextId); nextId);
++nextId;
nextId = beliefList.size();
} }
observationProbabilitiesInAction.push_back(actionObservationProbabilities); observationProbabilitiesInAction.push_back(actionObservationProbabilities);
nextBelievesInAction.push_back(actionObservationBelieves); nextBelievesInAction.push_back(actionObservationBelieves);
@ -67,24 +77,18 @@ namespace storm {
nextBelieves.push_back(nextBelievesInAction); nextBelieves.push_back(nextBelievesInAction);
} }
} }
STORM_LOG_DEBUG("End of Section 1");
// Value Iteration // Value Iteration
auto cc = storm::utility::ConstantsComparator<ValueType>(); auto cc = storm::utility::ConstantsComparator<ValueType>();
while (!finished && iteration < maxIterations) { while (!finished && iteration < maxIterations) {
STORM_PRINT("Iteration " << std::to_string(iteration) << std::endl);
STORM_LOG_DEBUG("Iteration " << iteration + 1);
bool improvement = false; bool improvement = false;
for (size_t i = 0; i < beliefGrid.size(); ++i) { for (size_t i = 0; i < beliefGrid.size(); ++i) {
bool isTarget = beliefIsKnown[i]; bool isTarget = beliefIsKnown[i];
if (!isTarget) { if (!isTarget) {
Belief<ValueType> currentBelief = beliefGrid[i];
STORM_LOG_DEBUG(
"Check Belief " << currentBelief.id << ": ||" << currentBelief.observation << "|"
<< currentBelief.probabilities << "||");
storm::pomdp::Belief<ValueType> currentBelief = beliefGrid[i];
// we can take any state with the observation as they have the same number of choices // we can take any state with the observation as they have the same number of choices
uint64_t numChoices = pomdp.getNumberOfChoices( uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(currentBelief.observation).front()); pomdp.getStatesWithObservation(currentBelief.observation).front());
STORM_LOG_DEBUG("Number choices: " << std::to_string(numChoices));
// Initialize the values for the value iteration // Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>(); : -storm::utility::infinity<ValueType>();
@ -96,8 +100,7 @@ namespace storm {
for (auto iter = observationProbabilities[i][action].begin(); for (auto iter = observationProbabilities[i][action].begin();
iter != observationProbabilities[i][action].end(); ++iter) { iter != observationProbabilities[i][action].end(); ++iter) {
uint32_t observation = iter->first; uint32_t observation = iter->first;
Belief<ValueType> nextBelief = nextBelieves[i][action][observation];
storm::pomdp::Belief<ValueType> nextBelief = beliefList[nextBelieves[i][action][observation]];
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas( std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
nextBelief.probabilities, gridResolution); nextBelief.probabilities, gridResolution);
@ -108,7 +111,7 @@ namespace storm {
for (size_t j = 0; j < lambdas.size(); ++j) { for (size_t j = 0; j < lambdas.size(); ++j) {
if (lambdas[j] != storm::utility::zero<ValueType>()) { if (lambdas[j] != storm::utility::zero<ValueType>()) {
sum += lambdas[j] * result_backup.at( sum += lambdas[j] * result_backup.at(
getBeliefIdInGrid(beliefGrid, observation, subSimplex[j]));
getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
} }
} }
currentValue += iter->second * sum; currentValue += iter->second * sum;
@ -124,22 +127,12 @@ namespace storm {
// TODO tie breaker? // TODO tie breaker?
} }
result[currentBelief.id] = chosenValue; result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
// Check if the iteration brought an improvement // Check if the iteration brought an improvement
if ((min && cc.isLess(storm::utility::zero<ValueType>(), if ((min && cc.isLess(storm::utility::zero<ValueType>(),
result_backup[currentBelief.id] - result[currentBelief.id])) || result_backup[currentBelief.id] - result[currentBelief.id])) ||
(!min && cc.isLess(storm::utility::zero<ValueType>(), (!min && cc.isLess(storm::utility::zero<ValueType>(),
result[currentBelief.id] - result_backup[currentBelief.id]))) { result[currentBelief.id] - result_backup[currentBelief.id]))) {
if (min) {
STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: "
<< result[currentBelief.id] << std::endl << "Delta: "
<< result_backup[currentBelief.id] - result[currentBelief.id]
<< std::endl);
} else {
STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: "
<< result[currentBelief.id] << std::endl << "Delta: "
<< result_backup[currentBelief.id] - result[currentBelief.id]
<< std::endl);
}
improvement = true; improvement = true;
} }
} }
@ -152,32 +145,120 @@ namespace storm {
++iteration; ++iteration;
} }
// maybe change this so the initial Belief always has ID 0
Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId);
++nextId;
STORM_PRINT("Grid approximation took " << iteration << " iterations" << std::endl);
beliefGrid.push_back(initialBelief);
beliefIsKnown.push_back(
target_observations.find(initialBelief.observation) != target_observations.end());
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas( std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
initialBelief.probabilities, gridResolution); initialBelief.probabilities, gridResolution);
std::vector<std::vector<ValueType>> initSubSimplex = temp.first;
std::vector<ValueType> initLambdas = temp.second;
auto overApprox = storm::utility::zero<ValueType>();
for (size_t j = 0; j < initLambdas.size(); ++j) {
if (initLambdas[j] != storm::utility::zero<ValueType>()) {
overApprox += initLambdas[j] *
result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation,
initSubSimplex[j])];
}
}
// Now onto the under-approximation
/* All this has to be put into a separate function as we have to repeat it for other believes not in the grid
// first compute some things for the initial belief
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction;
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction;
uint64_t initialNumChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(initialBelief.observation).front());
for (uint64_t action = 0; action < initialNumChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, initialBelief, action);
std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp,
beliefList,
initialBelief,
action,
observation,
nextId);
}
observationProbabilitiesInAction.push_back(actionObservationProbabilities);
nextBelievesInAction.push_back(actionObservationBelieves);
}
observationProbabilities.push_back(observationProbabilitiesInAction);
nextBelieves.push_back(nextBelievesInAction);
// do one step here to get the best action in the initial state
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
ValueType currentValue;
for (uint64_t action = 0; action < initialNumChoices; ++action) {
currentValue = storm::utility::zero<ValueType>(); // simply change this for rewards?
for (auto iter = observationProbabilities[initialBelief.id][action].begin();
iter != observationProbabilities[initialBelief.id][action].end(); ++iter) {
uint32_t observation = iter->first;
storm::pomdp::Belief<ValueType> nextBelief = beliefList[nextBelieves[initialBelief.id][action][observation]];
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
temp = computeSubSimplexAndLambdas(
nextBelief.probabilities, gridResolution);
std::vector<std::vector<ValueType>> subSimplex = temp.first; std::vector<std::vector<ValueType>> subSimplex = temp.first;
std::vector<ValueType> lambdas = temp.second; std::vector<ValueType> lambdas = temp.second;
ValueType overApprox = storm::utility::zero<ValueType>();
ValueType sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) { for (size_t j = 0; j < lambdas.size(); ++j) {
if (lambdas[j] != storm::utility::zero<ValueType>()) { if (lambdas[j] != storm::utility::zero<ValueType>()) {
overApprox += lambdas[j] *
result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation,
subSimplex[j])];
sum += lambdas[j] * result.at(
getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
}
} }
currentValue += iter->second * sum;
} }
// Update the selected actions
if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
(!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue))) {
chosenValue = currentValue;
chosenActionIndex = action;
}
}
chosenActions[initialBelief.id] = chosenActionIndex;*/
std::set<uint64_t> exploredBelieves;
std::deque<uint64_t> believesToBeExplored;
exploredBelieves.insert(initialBelief.id);
believesToBeExplored.push_back(initialBelief.id);
while (!believesToBeExplored.empty()) {
auto currentBeliefId = believesToBeExplored.front();
if (chosenActions.find(currentBeliefId) != chosenActions.end()) {
} else {
}
believesToBeExplored.pop_front();
}
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInGrid(
std::vector<Belief<ValueType>> &grid, uint32_t observation, std::vector<ValueType> probabilities) {
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInVector(
std::vector<storm::pomdp::Belief<ValueType>> &grid, uint32_t observation,
std::vector<ValueType> probabilities) {
for (auto const &belief : grid) { for (auto const &belief : grid) {
if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) {
if (belief.observation == observation) {
if (belief.probabilities == probabilities) { if (belief.probabilities == probabilities) {
STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id));
return belief.id; return belief.id;
@ -189,7 +270,7 @@ namespace storm {
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(
storm::pomdp::Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id) { storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id) {
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
"POMDP contains more than one initial state"); "POMDP contains more than one initial state");
@ -203,16 +284,18 @@ namespace storm {
observation = pomdp.getObservation(state); observation = pomdp.getObservation(state);
} }
} }
return Belief<ValueType>{id, observation, distribution};
return storm::pomdp::Belief<ValueType>{id, observation, distribution};
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::constructBeliefGrid( void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::constructBeliefGrid(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> target_observations, uint64_t gridResolution, std::set<uint32_t> target_observations, uint64_t gridResolution,
std::vector<Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown) {
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<storm::pomdp::Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown,
uint64_t nextId) {
bool isTarget; bool isTarget;
uint64_t newId = 0;
uint64_t newId = nextId;
for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) {
std::vector<uint64_t> statesWithObservation = pomdp.getStatesWithObservation(observation); std::vector<uint64_t> statesWithObservation = pomdp.getStatesWithObservation(observation);
@ -224,10 +307,11 @@ namespace storm {
std::vector<ValueType> distribution(pomdp.getNumberOfStates(), std::vector<ValueType> distribution(pomdp.getNumberOfStates(),
storm::utility::zero<ValueType>()); storm::utility::zero<ValueType>());
distribution[statesWithObservation.front()] = storm::utility::one<ValueType>(); distribution[statesWithObservation.front()] = storm::utility::one<ValueType>();
Belief<ValueType> belief = {newId, observation, distribution};
storm::pomdp::Belief<ValueType> belief = {newId, observation, distribution};
STORM_LOG_TRACE( STORM_LOG_TRACE(
"Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << "),"
<< distribution << "]"); << distribution << "]");
beliefList.push_back(belief);
grid.push_back(belief); grid.push_back(belief);
beliefIsKnown.push_back(isTarget); beliefIsKnown.push_back(isTarget);
++newId; ++newId;
@ -251,10 +335,11 @@ namespace storm {
helper[statesWithObservation.size() - 1] / helper[statesWithObservation.size() - 1] /
storm::utility::convertNumber<ValueType>(gridResolution); storm::utility::convertNumber<ValueType>(gridResolution);
Belief<ValueType> belief = {newId, observation, distribution};
storm::pomdp::Belief<ValueType> belief = {newId, observation, distribution};
STORM_LOG_TRACE( STORM_LOG_TRACE(
"Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation)
<< ")," << distribution << "]"); << ")," << distribution << "]");
beliefList.push_back(belief);
grid.push_back(belief); grid.push_back(belief);
beliefIsKnown.push_back(isTarget); beliefIsKnown.push_back(isTarget);
if (helper[statesWithObservation.size() - 1] == if (helper[statesWithObservation.size() - 1] ==
@ -353,7 +438,8 @@ namespace storm {
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::map<uint32_t, ValueType> std::map<uint32_t, ValueType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction( ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
storm::pomdp::Belief<ValueType> belief,
uint64_t actionIndex) { uint64_t actionIndex) {
std::map<uint32_t, ValueType> res; std::map<uint32_t, ValueType> res;
// the id is not important here as we immediately discard the belief (very hacky, I don't like it either) // the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
@ -373,8 +459,10 @@ namespace storm {
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterAction(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
storm::pomdp::Belief<ValueType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterAction(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
storm::pomdp::Belief<ValueType> belief,
uint64_t actionIndex, uint64_t id) { uint64_t actionIndex, uint64_t id) {
std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>()); std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>());
uint32_t observation = 0; uint32_t observation = 0;
@ -388,22 +476,13 @@ namespace storm {
} }
} }
} }
/* Should not be necessary
// We have to normalize the distribution
auto sum = storm::utility::zero<ValueType>();
for(ValueType const& entry : distributionAfter){
sum += entry;
}
for(size_t i = 0; i < pomdp.getNumberOfStates(); ++i){
distributionAfter[i] /= sum;
}*/
return Belief<ValueType>{id, observation, distributionAfter};
return storm::pomdp::Belief<ValueType>{id, observation, distributionAfter};
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
Belief<ValueType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList, storm::pomdp::Belief<ValueType> belief,
uint64_t actionIndex, uint32_t observation, uint64_t id) { uint64_t actionIndex, uint32_t observation, uint64_t id) {
std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>()); std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>());
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
@ -425,7 +504,12 @@ namespace storm {
for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) {
distributionAfter[i] /= sum; distributionAfter[i] /= sum;
} }
return Belief<ValueType>{id, observation, distributionAfter};
if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) {
return getBeliefIdInVector(beliefList, observation, distributionAfter);
} else {
beliefList.push_back(storm::pomdp::Belief<ValueType>{id, observation, distributionAfter});
return id;
}
} }

35
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -2,21 +2,13 @@
#include "storm/modelchecker/CheckTask.h" #include "storm/modelchecker/CheckTask.h"
#include "storm/models/sparse/Pomdp.h" #include "storm/models/sparse/Pomdp.h"
#include "storm/utility/logging.h" #include "storm/utility/logging.h"
#include "storm-pomdp/storage/Belief.h"
namespace storm { namespace storm {
namespace pomdp { namespace pomdp {
namespace modelchecker { namespace modelchecker {
class POMDPCheckResult; class POMDPCheckResult;
// Structure used to represent a belief
template<typename ValueType>
struct Belief {
uint64_t id;
uint32_t observation;
//TODO make this sparse?
std::vector<ValueType> probabilities;
};
template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel <ValueType>> template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel <ValueType>>
class ApproximatePOMDPModelchecker { class ApproximatePOMDPModelchecker {
public: public:
@ -38,7 +30,7 @@ namespace storm {
* @param id * @param id
* @return * @return
*/ */
Belief<ValueType>
storm::pomdp::Belief<ValueType>
getInitialBelief(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id); getInitialBelief(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id);
@ -61,7 +53,9 @@ namespace storm {
*/ */
void constructBeliefGrid(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, void constructBeliefGrid(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> target_observations, uint64_t gridResolution, std::set<uint32_t> target_observations, uint64_t gridResolution,
std::vector<Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown);
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<storm::pomdp::Belief<ValueType>> &grid,
std::vector<bool> &beliefIsKnown, uint64_t nextId);
/** /**
@ -73,11 +67,13 @@ namespace storm {
* @return * @return
*/ */
std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction( std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief,
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
storm::pomdp::Belief<ValueType> belief,
uint64_t actionIndex); uint64_t actionIndex);
/** /**
* Helper method to get the next belief that results from a belief by performing an action and observing an observation
* Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation.
* If the belief does not exist yet, it is created and added to the list of all beliefs
* *
* @param pomdp the POMDP on which the evaluation should be performed * @param pomdp the POMDP on which the evaluation should be performed
* @param belief the starting belief * @param belief the starting belief
@ -85,9 +81,10 @@ namespace storm {
* @param observation the observation after the action was performed * @param observation the observation after the action was performed
* @return the resulting belief (observation and distribution) * @return the resulting belief (observation and distribution)
*/ */
Belief<ValueType>
uint64_t
getBeliefAfterActionAndObservation(const models::sparse::Pomdp <ValueType, RewardModelType> &pomdp, getBeliefAfterActionAndObservation(const models::sparse::Pomdp <ValueType, RewardModelType> &pomdp,
Belief<ValueType> belief,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
storm::pomdp::Belief<ValueType> belief,
uint64_t actionIndex, uint32_t observation, uint64_t id); uint64_t actionIndex, uint32_t observation, uint64_t id);
/** /**
@ -98,18 +95,18 @@ namespace storm {
* @param actionIndex * @param actionIndex
* @return * @return
*/ */
Belief<ValueType>
storm::pomdp::Belief<ValueType>
getBeliefAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, getBeliefAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
Belief<ValueType> belief, uint64_t actionIndex, uint64_t id);
storm::pomdp::Belief<ValueType> belief, uint64_t actionIndex, uint64_t id);
/** /**
* Helper to get the id of a Belief in the grid
* Helper to get the id of a Belief stored in a given vector structure
* *
* @param observation * @param observation
* @param probabilities * @param probabilities
* @return * @return
*/ */
uint64_t getBeliefIdInGrid(std::vector<Belief<ValueType>> &grid, uint32_t observation,
uint64_t getBeliefIdInVector(std::vector<storm::pomdp::Belief<ValueType>> &grid, uint32_t observation,
std::vector<ValueType> probabilities); std::vector<ValueType> probabilities);
}; };

12
src/storm-pomdp/storage/Belief.h

@ -0,0 +1,12 @@
namespace storm {
namespace pomdp {
// Structure used to represent a belief
template<typename ValueType>
struct Belief {
uint64_t id;
uint32_t observation;
//TODO make this sparse?
std::vector<ValueType> probabilities;
};
}
}
Loading…
Cancel
Save