Alexander Bork
5 years ago
5 changed files with 566 additions and 4 deletions
-
16src/storm-pomdp-cli/storm-pomdp.cpp
-
419src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
-
118src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
-
13src/storm/models/sparse/Pomdp.cpp
-
2src/storm/models/sparse/Pomdp.h
@ -0,0 +1,419 @@ |
|||
#include <storm/utility/ConstantsComparator.h>
|
|||
#include "ApproximatePOMDPModelchecker.h"
|
|||
#include "storm/utility/vector.h"
|
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
template<typename ValueType, typename RewardModelType> |
|||
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker() { |
|||
//Intentionally left empty
|
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
/*std::unique_ptr<POMDPCheckResult>*/ void |
|||
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbability( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, |
|||
std::set<uint32_t> target_observations, bool min, uint64_t gridResolution) { |
|||
//TODO add timing
|
|||
uint64_t maxIterations = 100; |
|||
bool finished = false; |
|||
uint64_t iteration = 0; |
|||
|
|||
|
|||
std::vector<Belief<ValueType>> beliefGrid; |
|||
std::vector<bool> beliefIsKnown; |
|||
constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown); |
|||
std::map<uint64_t, ValueType> result; |
|||
std::map<uint64_t, ValueType> result_backup; |
|||
|
|||
std::vector<std::vector<std::map<uint32_t, ValueType>>> observationProbabilities; |
|||
std::vector<std::vector<std::map<uint32_t, Belief<ValueType>>>> nextBelieves; |
|||
|
|||
uint64_t nextId = beliefGrid.size(); |
|||
for (size_t i = 0; i < beliefGrid.size(); ++i) { |
|||
auto currentBelief = beliefGrid[i]; |
|||
bool isTarget = beliefIsKnown[i]; |
|||
if (isTarget) { |
|||
result.emplace(std::make_pair(currentBelief.id, storm::utility::one<ValueType>())); |
|||
result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one<ValueType>())); |
|||
} else { |
|||
result.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>())); |
|||
result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>())); |
|||
|
|||
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction; |
|||
std::vector<std::map<uint32_t, Belief<ValueType>>> nextBelievesInAction; |
|||
|
|||
uint64_t numChoices = pomdp.getNumberOfChoices( |
|||
pomdp.getStatesWithObservation(currentBelief.observation).front()); |
|||
for (uint64_t action = 0; action < numChoices; ++action) { |
|||
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction( |
|||
pomdp, currentBelief, action); |
|||
std::map<uint32_t, Belief<ValueType>> actionObservationBelieves; |
|||
for (auto iter = actionObservationProbabilities.begin(); |
|||
iter != actionObservationProbabilities.end(); ++iter) { |
|||
uint32_t observation = iter->first; |
|||
actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, |
|||
currentBelief, |
|||
action, |
|||
observation, |
|||
nextId); |
|||
++nextId; |
|||
} |
|||
observationProbabilitiesInAction.push_back(actionObservationProbabilities); |
|||
nextBelievesInAction.push_back(actionObservationBelieves); |
|||
} |
|||
observationProbabilities.push_back(observationProbabilitiesInAction); |
|||
nextBelieves.push_back(nextBelievesInAction); |
|||
} |
|||
} |
|||
|
|||
// Value Iteration
|
|||
while (!finished && iteration < maxIterations) { |
|||
STORM_LOG_DEBUG("Iteration " << std::to_string(iteration)); |
|||
bool improvement = false; |
|||
for (size_t i = 0; i < beliefGrid.size(); ++i) { |
|||
bool isTarget = beliefIsKnown[i]; |
|||
if (!isTarget) { |
|||
Belief<ValueType> currentBelief = beliefGrid[i]; |
|||
// we can take any state with the observation as they have the same number of choices
|
|||
uint64_t numChoices = pomdp.getNumberOfChoices( |
|||
pomdp.getStatesWithObservation(currentBelief.observation).front()); |
|||
|
|||
// Initialize the values for the value iteration
|
|||
ValueType bestValue = min ? storm::utility::infinity<ValueType>() |
|||
: -storm::utility::infinity<ValueType>(); |
|||
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity(); |
|||
ValueType currentValue; |
|||
|
|||
for (uint64_t action = 0; action < numChoices; ++action) { |
|||
currentValue = storm::utility::zero<ValueType>(); // simply change this for rewards?
|
|||
|
|||
for (auto iter = observationProbabilities[i][action].begin(); |
|||
iter != observationProbabilities[i][action].end(); ++iter) { |
|||
uint32_t observation = iter->first; |
|||
Belief<ValueType> nextBelief = nextBelieves[i][action][observation]; |
|||
|
|||
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
|
|||
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas( |
|||
currentBelief.probabilities, gridResolution); |
|||
std::vector<std::vector<ValueType>> subSimplex = temp.first; |
|||
std::vector<ValueType> lambdas = temp.second; |
|||
|
|||
ValueType sum = storm::utility::zero<ValueType>(); |
|||
for (size_t j = 0; j < lambdas.size(); ++j) { |
|||
if (lambdas[j] != storm::utility::zero<ValueType>()) { |
|||
sum += lambdas[j] * result_backup.at( |
|||
getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); |
|||
} |
|||
} |
|||
|
|||
currentValue += iter->second * sum; |
|||
} |
|||
|
|||
// Update the selected actions
|
|||
auto cc = storm::utility::ConstantsComparator<ValueType>(); |
|||
if ((min && cc.isLess(storm::utility::zero<ValueType>(), bestValue - currentValue)) || |
|||
(!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - bestValue))) { |
|||
improvement = true; |
|||
bestValue = currentValue; |
|||
chosenActionIndex = action; |
|||
} |
|||
// TODO tie breaker?
|
|||
} |
|||
result[currentBelief.id] = bestValue; |
|||
} |
|||
} |
|||
finished = !improvement; |
|||
// back up
|
|||
for (auto iter = result.begin(); iter != result.end(); ++iter) { |
|||
result_backup[iter->first] = result[iter->first]; |
|||
} |
|||
++iteration; |
|||
} |
|||
|
|||
// maybe change this so the initial Belief always has ID 0
|
|||
Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId); |
|||
++nextId; |
|||
|
|||
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas( |
|||
initialBelief.probabilities, gridResolution); |
|||
std::vector<std::vector<ValueType>> subSimplex = temp.first; |
|||
std::vector<ValueType> lambdas = temp.second; |
|||
|
|||
ValueType overApprox = storm::utility::zero<ValueType>(); |
|||
for (size_t j = 0; j < lambdas.size(); ++j) { |
|||
if (lambdas[j] != storm::utility::zero<ValueType>()) { |
|||
overApprox += lambdas[j] * |
|||
result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation, |
|||
subSimplex[j])]; |
|||
} |
|||
} |
|||
|
|||
STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox); |
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInGrid( |
|||
std::vector<Belief<ValueType>> &grid, uint32_t observation, std::vector<ValueType> probabilities) { |
|||
for (auto const &belief : grid) { |
|||
if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) { |
|||
if (belief.probabilities == probabilities) { |
|||
STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); |
|||
return belief.id; |
|||
} |
|||
} |
|||
} |
|||
STORM_LOG_DEBUG("Did not find the belief in the grid"); |
|||
return -1; |
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id) { |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1, |
|||
"POMDP contains more than one initial state"); |
|||
std::vector<ValueType> distribution(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>()); |
|||
uint32_t observation = 0; |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
if (pomdp.getInitialStates()[state] == 1) { |
|||
distribution[state] = storm::utility::one<ValueType>(); |
|||
observation = pomdp.getObservation(state); |
|||
} |
|||
} |
|||
return Belief<ValueType>{id, observation, distribution}; |
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::constructBeliefGrid( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, |
|||
std::set<uint32_t> target_observations, uint64_t gridResolution, |
|||
std::vector<Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown) { |
|||
bool isTarget; |
|||
uint64_t newId = 0; |
|||
|
|||
for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { |
|||
std::vector<uint64_t> statesWithObservation = pomdp.getStatesWithObservation(observation); |
|||
isTarget = target_observations.find(observation) != target_observations.end(); |
|||
|
|||
// TODO this can probably be condensed
|
|||
if (statesWithObservation.size() == 1) { |
|||
// If there is only one state with the observation, we can directly add the corresponding belief
|
|||
std::vector<ValueType> distribution(pomdp.getNumberOfStates(), |
|||
storm::utility::zero<ValueType>()); |
|||
distribution[statesWithObservation.front()] = storm::utility::one<ValueType>(); |
|||
Belief<ValueType> belief = {newId, observation, distribution}; |
|||
STORM_LOG_TRACE( |
|||
"Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," |
|||
<< distribution << "]"); |
|||
grid.push_back(belief); |
|||
beliefIsKnown.push_back(isTarget); |
|||
++newId; |
|||
} else { |
|||
// Otherwise we have to enumerate all possible distributions with regards to the grid
|
|||
// helper is used to derive the distribution of the belief
|
|||
std::vector<uint64_t> helper(statesWithObservation.size(), 0); |
|||
helper[0] = gridResolution; |
|||
bool done = false; |
|||
uint64_t index = 0; |
|||
|
|||
while (!done) { |
|||
std::vector<ValueType> distribution(pomdp.getNumberOfStates(), |
|||
storm::utility::zero<ValueType>()); |
|||
for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { |
|||
distribution[statesWithObservation[i]] = ValueType( |
|||
double(helper[i] - helper[i + 1]) / gridResolution); |
|||
} |
|||
distribution[statesWithObservation.back()] = ValueType( |
|||
double(helper[statesWithObservation.size() - 1]) / gridResolution); |
|||
|
|||
Belief<ValueType> belief = {newId, observation, distribution}; |
|||
STORM_LOG_TRACE( |
|||
"Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) |
|||
<< ")," << distribution << "]"); |
|||
grid.push_back(belief); |
|||
beliefIsKnown.push_back(isTarget); |
|||
if (helper[statesWithObservation.size() - 1] == gridResolution) { |
|||
// If the last entry of helper is the gridResolution, we have enumerated all necessary distributions
|
|||
done = true; |
|||
} else { |
|||
// Update helper by finding the index to increment
|
|||
index = statesWithObservation.size() - 1; |
|||
while (helper[index] == helper[index - 1]) { |
|||
--index; |
|||
} |
|||
STORM_LOG_ASSERT(index > 0, "Error in BeliefGrid generation - index wrong"); |
|||
// Increment the value at the index
|
|||
++helper[index]; |
|||
// Reset all indices greater than the changed one to 0
|
|||
++index; |
|||
while (index < statesWithObservation.size()) { |
|||
helper[index] = 0; |
|||
++index; |
|||
} |
|||
} |
|||
++newId; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> |
|||
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeSubSimplexAndLambdas( |
|||
std::vector<ValueType> probabilities, uint64_t resolution) { |
|||
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
|
|||
// Variable names are based on the paper
|
|||
|
|||
std::vector<ValueType> x(probabilities.size(), storm::utility::zero<ValueType>()); |
|||
std::vector<ValueType> v(probabilities.size(), storm::utility::zero<ValueType>()); |
|||
std::vector<ValueType> d(probabilities.size(), storm::utility::zero<ValueType>()); |
|||
|
|||
for (size_t i = 0; i < probabilities.size(); ++i) { |
|||
for (size_t j = i; j < probabilities.size(); ++j) { |
|||
x[i] += storm::utility::convertNumber<ValueType>(resolution) * probabilities[j]; |
|||
} |
|||
v[i] = storm::utility::floor(x[i]); |
|||
d[i] = x[i] - v[i]; |
|||
} |
|||
|
|||
auto p = storm::utility::vector::getSortedIndices(d); |
|||
|
|||
std::vector<std::vector<ValueType>> qs; |
|||
for (size_t i = 0; i < probabilities.size(); ++i) { |
|||
std::vector<ValueType> q; |
|||
if (i == 0) { |
|||
for (size_t j = 0; j < probabilities.size(); ++j) { |
|||
q[i] = v[i]; |
|||
} |
|||
qs.push_back(q); |
|||
} else { |
|||
for (size_t j = 0; j < probabilities.size(); ++j) { |
|||
if (j == p[i - 1]) { |
|||
q[j] = qs[i - 1][j] + storm::utility::one<ValueType>(); |
|||
} else { |
|||
q[j] = qs[i - 1][j]; |
|||
} |
|||
} |
|||
qs.push_back(q); |
|||
} |
|||
} |
|||
|
|||
std::vector<std::vector<ValueType>> subSimplex; |
|||
for (auto q : qs) { |
|||
std::vector<ValueType> node; |
|||
for (size_t i = 0; i < probabilities.size(); ++i) { |
|||
if (i != probabilities.size() - 1) { |
|||
node.push_back((q[i] - q[i + 1]) / storm::utility::convertNumber<ValueType>(resolution)); |
|||
} else { |
|||
node.push_back(q[i] / storm::utility::convertNumber<ValueType>(resolution)); |
|||
} |
|||
} |
|||
subSimplex.push_back(node); |
|||
} |
|||
|
|||
std::vector<ValueType> lambdas(probabilities.size(), storm::utility::zero<ValueType>()); |
|||
auto sum = storm::utility::zero<ValueType>(); |
|||
for (size_t i = 1; i < probabilities.size(); ++i) { |
|||
lambdas[i] = d[p[i - 1]] - d[p[i]]; |
|||
sum += d[p[i - 1]] - d[p[i]]; |
|||
} |
|||
lambdas[0] = storm::utility::one<ValueType>() - sum; |
|||
|
|||
//TODO add assertion that we are close enough
|
|||
return std::make_pair(subSimplex, lambdas); |
|||
} |
|||
|
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
std::map<uint32_t, ValueType> |
|||
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief, |
|||
uint64_t actionIndex) { |
|||
std::map<uint32_t, ValueType> res; |
|||
// the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
|
|||
std::vector<ValueType> postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, |
|||
0).probabilities; |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
uint32_t observation = pomdp.getObservation(state); |
|||
if (postProbabilities[state] != storm::utility::zero<ValueType>()) { |
|||
if (res.count(observation) == 0) { |
|||
res[observation] = postProbabilities[state]; |
|||
} else { |
|||
res[observation] += postProbabilities[state]; |
|||
} |
|||
} |
|||
} |
|||
return res; |
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterAction( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief, |
|||
uint64_t actionIndex, uint64_t id) { |
|||
std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>()); |
|||
uint32_t observation = 0; |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
if (belief.probabilities[state] != storm::utility::zero<ValueType>()) { |
|||
auto row = pomdp.getTransitionMatrix().getRow( |
|||
pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); |
|||
for (auto const &entry : row) { |
|||
observation = pomdp.getObservation(entry.getColumn()); |
|||
distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); |
|||
} |
|||
} |
|||
} |
|||
/* Should not be necessary
|
|||
// We have to normalize the distribution
|
|||
auto sum = storm::utility::zero<ValueType>(); |
|||
for(ValueType const& entry : distributionAfter){ |
|||
sum += entry; |
|||
} |
|||
for(size_t i = 0; i < pomdp.getNumberOfStates(); ++i){ |
|||
distributionAfter[i] /= sum; |
|||
}*/ |
|||
return Belief<ValueType>{id, observation, distributionAfter}; |
|||
} |
|||
|
|||
template<typename ValueType, typename RewardModelType> |
|||
Belief<ValueType> |
|||
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief, |
|||
uint64_t actionIndex, uint32_t observation, uint64_t id) { |
|||
std::vector<ValueType> distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>()); |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
if (belief.probabilities[state] != storm::utility::zero<ValueType>()) { |
|||
auto row = pomdp.getTransitionMatrix().getRow( |
|||
pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); |
|||
for (auto const &entry : row) { |
|||
if (pomdp.getObservation(entry.getColumn()) == observation) { |
|||
distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
// We have to normalize the distribution
|
|||
auto sum = storm::utility::zero<ValueType>(); |
|||
for (ValueType const &entry : distributionAfter) { |
|||
sum += entry; |
|||
} |
|||
for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { |
|||
distributionAfter[i] /= sum; |
|||
} |
|||
return Belief<ValueType>{id, observation, distributionAfter}; |
|||
} |
|||
|
|||
|
|||
template |
|||
class ApproximatePOMDPModelchecker<double>; |
|||
|
|||
#ifdef STORM_HAVE_CARL
|
|||
|
|||
//template class ApproximatePOMDPModelchecker<storm::RationalFunction>;
|
|||
template |
|||
class ApproximatePOMDPModelchecker<storm::RationalNumber>; |
|||
|
|||
#endif
|
|||
} |
|||
} |
|||
} |
@ -0,0 +1,118 @@ |
|||
#include <cstdlib> |
|||
#include "storm/modelchecker/CheckTask.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/utility/logging.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
class POMDPCheckResult; |
|||
|
|||
// Structure used to represent a belief |
|||
template<typename ValueType> |
|||
struct Belief { |
|||
uint64_t id; |
|||
uint32_t observation; |
|||
//TODO make this sparse? |
|||
std::vector<ValueType> probabilities; |
|||
}; |
|||
|
|||
template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel <ValueType>> |
|||
class ApproximatePOMDPModelchecker { |
|||
public: |
|||
explicit ApproximatePOMDPModelchecker(); |
|||
|
|||
/*std::unique_ptr<POMDPCheckResult>*/ void |
|||
computeReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, |
|||
std::set<uint32_t> target_observations, bool min, |
|||
uint64_t gridResolution); |
|||
|
|||
std::unique_ptr<POMDPCheckResult> |
|||
computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, |
|||
std::set<uint32_t> target_observations, uint64_t gridResolution); |
|||
|
|||
private: |
|||
/** |
|||
* |
|||
* @param pomdp |
|||
* @param id |
|||
* @return |
|||
*/ |
|||
Belief<ValueType> |
|||
getInitialBelief(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id); |
|||
|
|||
|
|||
/** |
|||
* |
|||
* @param probabilities |
|||
* @param gridResolution |
|||
* @return |
|||
*/ |
|||
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> |
|||
computeSubSimplexAndLambdas(std::vector<ValueType> probabilities, uint64_t gridResolution); |
|||
|
|||
|
|||
/** |
|||
* Helper method to construct the grid of Belief states to approximate the POMDP |
|||
* |
|||
* @param pomdp |
|||
* @param gridResolution |
|||
* |
|||
*/ |
|||
void constructBeliefGrid(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, |
|||
std::set<uint32_t> target_observations, uint64_t gridResolution, |
|||
std::vector<Belief<ValueType>> &grid, std::vector<bool> &beliefIsKnown); |
|||
|
|||
|
|||
/** |
|||
* Helper method to get the probabilities of each observation after performing an action |
|||
* |
|||
* @param pomdp |
|||
* @param belief |
|||
* @param actionIndex |
|||
* @return |
|||
*/ |
|||
std::map<uint32_t, ValueType> computeObservationProbabilitiesAfterAction( |
|||
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, Belief<ValueType> belief, |
|||
uint64_t actionIndex); |
|||
|
|||
/** |
|||
* Helper method to get the next belief that results from a belief by performing an action and observing an observation |
|||
* |
|||
* @param pomdp the POMDP on which the evaluation should be performed |
|||
* @param belief the starting belief |
|||
* @param actionIndex the index of the action to be performed |
|||
* @param observation the observation after the action was performed |
|||
* @return the resulting belief (observation and distribution) |
|||
*/ |
|||
Belief<ValueType> |
|||
getBeliefAfterActionAndObservation(const models::sparse::Pomdp <ValueType, RewardModelType> &pomdp, |
|||
Belief<ValueType> belief, |
|||
uint64_t actionIndex, uint32_t observation, uint64_t id); |
|||
|
|||
/** |
|||
* Helper method to get the next belief that results from a belief by performing an action |
|||
* |
|||
* @param pomdp |
|||
* @param belief |
|||
* @param actionIndex |
|||
* @return |
|||
*/ |
|||
Belief<ValueType> |
|||
getBeliefAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, |
|||
Belief<ValueType> belief, uint64_t actionIndex, uint64_t id); |
|||
|
|||
/** |
|||
* Helper to get the id of a Belief in the grid |
|||
* |
|||
* @param observation |
|||
* @param probabilities |
|||
* @return |
|||
*/ |
|||
uint64_t getBeliefIdInGrid(std::vector<Belief<ValueType>> &grid, uint32_t observation, |
|||
std::vector<ValueType> probabilities); |
|||
}; |
|||
|
|||
} |
|||
} |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue