Browse Source

Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp.

main
Tim Quatmann 5 years ago
parent
commit
ab26b69435
  1. 354
      src/storm-pomdp/builder/BeliefMdpExplorer.h
  2. 248
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  3. 9
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
  4. 24
      src/storm-pomdp/storage/BeliefManager.h

354
src/storm-pomdp/builder/BeliefMdpExplorer.h

@ -0,0 +1,354 @@
#pragma once
#include <memory>
#include <vector>
#include <deque>
#include <map>
#include <boost/optional.hpp>
#include "storm/api/properties.h"
#include "storm/api/verification.h"
#include "storm/storage/BitVector.h"
#include "storm/utility/macros.h"
#include "storm-pomdp/storage/BeliefManager.h"
#include "storm/utility/SignalHandler.h"
namespace storm {
namespace builder {
template<typename PomdpType>
class BeliefMdpExplorer {
public:
typedef typename PomdpType::ValueType ValueType;
typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
typedef typename BeliefManagerType::BeliefId BeliefId;
typedef uint64_t MdpStateType;
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
// Intentionally left empty
}
void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
// Reset data from potential previous explorations
mdpStateToBeliefIdMap.clear();
beliefIdToMdpStateMap.clear();
beliefIdsWithMdpState.clear();
beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false);
lowerValueBounds.clear();
upperValueBounds.clear();
values.clear();
mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder<ValueType>(0, 0, 0, true, true);
currentRowCount = 0;
startOfCurrentRowGroup = 0;
mdpActionRewards.clear();
exploredMdp = nullptr;
// Add some states with special treatment (if requested)
if (extraBottomStateValue) {
extraBottomState = getCurrentNumberOfMdpStates();
mdpStateToBeliefIdMap.push_back(beliefManager->noId());
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get());
startOfCurrentRowGroup = currentRowCount;
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one<ValueType>());
++currentRowCount;
} else {
extraBottomState = boost::none;
}
if (extraTargetStateValue) {
extraTargetState = getCurrentNumberOfMdpStates();
mdpStateToBeliefIdMap.push_back(beliefManager->noId());
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get());
startOfCurrentRowGroup = currentRowCount;
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one<ValueType>());
++currentRowCount;
targetStates.grow(getCurrentNumberOfMdpStates(), false);
targetStates.set(extraTargetState.get(), true);
} else {
extraTargetState = boost::none;
}
// Set up the initial state.
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
}
bool hasUnexploredState() const {
return !beliefIdsToExplore.empty();
}
BeliefId exploreNextState() {
// Set up the matrix builder
finishCurrentRow();
startOfCurrentRowGroup = currentRowCount;
mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup);
++currentRowCount;
// Pop from the queue.
auto result = beliefIdsToExplore.front();
beliefIdsToExplore.pop_front();
return result;
}
void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) {
// We first insert the entries of the current row in a separate map.
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
uint64_t row = startOfCurrentRowGroup + localActionIndex;
if (!storm::utility::isZero(bottomStateValue)) {
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none.");
internalAddTransition(row, extraBottomState.get(), bottomStateValue);
}
if (!storm::utility::isZero(targetStateValue)) {
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none.");
internalAddTransition(row, extraTargetState.get(), targetStateValue);
}
}
void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) {
uint64_t row = startOfCurrentRowGroup + localActionIndex;
internalAddTransition(row, getCurrentMdpState(), value);
}
/*!
* Adds the next transition to the given successor belief
* @param localActionIndex
* @param transitionTarget
* @param value
* @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition.
* @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true.
*/
bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) {
// We first insert the entries of the current row in a separate map.
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
MdpStateType column;
if (ignoreNewBeliefs) {
column = getMdpState(transitionTarget);
if (column == noState()) {
return false;
}
} else {
column = getOrAddMdpState(transitionTarget);
}
uint64_t row = startOfCurrentRowGroup + localActionIndex;
internalAddTransition(row, column, value);
return true;
}
void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) {
if (currentRowCount >= mdpActionRewards.size()) {
mdpActionRewards.resize(currentRowCount, storm::utility::zero<ValueType>());
}
uint64_t row = startOfCurrentRowGroup + localActionIndex;
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
}
void setCurrentStateIsTarget() {
targetStates.grow(getCurrentNumberOfMdpStates(), false);
targetStates.set(getCurrentMdpState(), true);
}
void setCurrentStateIsTruncated() {
truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
truncatedStates.set(getCurrentMdpState(), true);
}
void finishExploration() {
// Create the tranistion matrix
finishCurrentRow();
auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates());
// Create a standard labeling
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
mdpLabeling.addLabel("init");
mdpLabeling.addLabelToState("init", initialMdpState);
targetStates.resize(getCurrentNumberOfMdpStates(), false);
mdpLabeling.addLabel("target", std::move(targetStates));
truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
mdpLabeling.addLabel("truncated", std::move(truncatedStates));
// Create a standard reward model (if rewards are available)
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
if (!mdpActionRewards.empty()) {
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
}
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
}
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const {
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
return exploredMdp;
}
MdpStateType getCurrentNumberOfMdpStates() const {
return mdpStateToBeliefIdMap.size();
}
MdpStateType getCurrentNumberOfMdpChoices() const {
return currentRowCount;
}
ValueType getLowerValueBoundAtCurrentState() const {
return lowerValueBounds[getCurrentMdpState()];
}
ValueType getUpperValueBoundAtCurrentState() const {
return upperValueBounds[getCurrentMdpState()];
}
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
}
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
}
std::vector<ValueType> const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) {
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
auto task = createStandardCheckTask(property);
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task));
if (res) {
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
} else {
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!");
STORM_LOG_ERROR("No result obtained while checking.");
}
return values;
}
ValueType const& getComputedValueAtInitialState() const {
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
return values[exploredMdp->getInitialStates().getNextSetIndex(0)];
}
private:
MdpStateType noState() const {
return std::numeric_limits<MdpStateType>::max();
}
std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) {
std::string propertyString = computeRewards ? "R" : "P";
propertyString += storm::solver::minimize(dir) ? "min" : "max";
propertyString += "=? [F \"target\"]";
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
return storm::api::extractFormulasFromProperties(propertyVector).front();
}
storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) {
//Note: The property should not run out of scope after calling this because the task only stores the property by reference.
// Therefore, this method needs the property by reference (and not const reference)
auto task = storm::api::createTask<ValueType>(property, false);
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
hint.setResultHint(values);
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
task.setHint(hintPtr);
return task;
}
MdpStateType getCurrentMdpState() const {
return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1;
}
MdpStateType getCurrentBeliefId() const {
return mdpStateToBeliefIdMap[getCurrentMdpState()];
}
void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) {
// We first insert the entries of the current row in a separate map.
// This is to ensure that entries are sorted in the right way (as required for the transition matrix builder)
STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row.");
if (row >= currentRowCount) {
// We are going to start a new row, so insert the entries of the old one
finishCurrentRow();
currentRowCount = row + 1;
}
STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state.");
mdpTransitionsBuilderCurrentRowEntries[column] = value;
}
void finishCurrentRow() {
for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) {
mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second);
}
mdpTransitionsBuilderCurrentRowEntries.clear();
}
MdpStateType getMdpState(BeliefId const& beliefId) const {
if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) {
return beliefIdToMdpStateMap.at(beliefId);
} else {
return noState();
}
}
void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) {
lowerValueBounds.push_back(lowerBound);
upperValueBounds.push_back(upperBound);
// Take the middle value as a hint
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states.");
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size.");
}
MdpStateType getOrAddMdpState(BeliefId const& beliefId) {
beliefIdsWithMdpState.grow(beliefId + 1, false);
if (beliefIdsWithMdpState.get(beliefId)) {
return beliefIdToMdpStateMap[beliefId];
} else {
// Add a new MDP state
beliefIdsWithMdpState.set(beliefId, true);
MdpStateType result = getCurrentNumberOfMdpStates();
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
mdpStateToBeliefIdMap.push_back(beliefId);
beliefIdToMdpStateMap[beliefId] = result;
// This new belief needs exploration
beliefIdsToExplore.push_back(beliefId);
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
return result;
}
}
// Belief state related information
std::shared_ptr<BeliefManagerType> beliefManager;
std::vector<BeliefId> mdpStateToBeliefIdMap;
std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
storm::storage::BitVector beliefIdsWithMdpState;
// Exploration information
std::deque<uint64_t> beliefIdsToExplore;
storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder;
std::map<MdpStateType, ValueType> mdpTransitionsBuilderCurrentRowEntries;
std::vector<ValueType> mdpActionRewards;
uint64_t startOfCurrentRowGroup;
uint64_t currentRowCount;
// Special states during exploration
boost::optional<MdpStateType> extraTargetState;
boost::optional<MdpStateType> extraBottomState;
storm::storage::BitVector targetStates;
storm::storage::BitVector truncatedStates;
MdpStateType initialMdpState;
// Final Mdp
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
// Value related information
std::vector<ValueType> const& pomdpLowerValueBounds;
std::vector<ValueType> const& pomdpUpperValueBounds;
std::vector<ValueType> lowerValueBounds;
std::vector<ValueType> upperValueBounds;
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
};
}
}

248
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -22,7 +22,8 @@
#include "storm/api/properties.h" #include "storm/api/properties.h"
#include "storm/api/export.h" #include "storm/api/export.h"
#include "storm-parsers/api/storm-parsers.h" #include "storm-parsers/api/storm-parsers.h"
#include "storm-pomdp/builder/BeliefMdpExplorer.h"
#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
#include "storm/utility/macros.h" #include "storm/utility/macros.h"
#include "storm/utility/SignalHandler.h" #include "storm/utility/SignalHandler.h"
@ -57,6 +58,10 @@ namespace storm {
std::unique_ptr<POMDPCheckResult<ValueType>> result; std::unique_ptr<POMDPCheckResult<ValueType>> result;
// Extract the relevant information from the formula // Extract the relevant information from the formula
auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
// Compute some initial bounds on the values for each state of the pomdp
auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
if (formulaInfo.isNonNestedReachabilityProbability()) { if (formulaInfo.isNonNestedReachabilityProbability()) {
// FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
@ -68,7 +73,7 @@ namespace storm {
if (options.doRefinement) { if (options.doRefinement) {
result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
} else { } else {
result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
} }
} else if (formulaInfo.isNonNestedExpectedRewardFormula()) { } else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
// FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
@ -78,7 +83,7 @@ namespace storm {
} else { } else {
// FIXME: pick the non-unique reward model here // FIXME: pick the non-unique reward model here
STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
} }
} else { } else {
STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
@ -233,8 +238,8 @@ namespace storm {
uint64_t refinementCounter = 1; uint64_t refinementCounter = 1;
STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
initialOverApproxMap,
initialUnderApproxMap, underApproxModelSize);
{},
{}, underApproxModelSize);
if (res == nullptr) { if (res == nullptr) {
statistics.refinementSteps = 0; statistics.refinementSteps = 0;
return nullptr; return nullptr;
@ -335,14 +340,14 @@ namespace storm {
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>> std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, bool computeRewards,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
std::vector<ValueType> const& lowerPomdpValueBounds,
std::vector<ValueType> const& upperPomdpValueBounds,
uint64_t maxUaModelSize) { uint64_t maxUaModelSize) {
STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap,
underApproximationMap, maxUaModelSize);
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
upperPomdpValueBounds, maxUaModelSize);
if (result == nullptr) { if (result == nullptr) {
return nullptr; return nullptr;
} }
@ -353,8 +358,6 @@ namespace storm {
} }
} }
template <typename ValueType, typename BeliefType, typename SummandsType> template <typename ValueType, typename BeliefType, typename SummandsType>
ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
ValueType result = storm::utility::zero<ValueType>(); ValueType result = storm::utility::zero<ValueType>();
@ -369,155 +372,64 @@ namespace storm {
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, bool computeRewards,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
std::vector<ValueType> const& lowerPomdpValueBounds,
std::vector<ValueType> const& upperPomdpValueBounds,
uint64_t maxUaModelSize) { uint64_t maxUaModelSize) {
bool boundMapsSet = overApproximationMap && underApproximationMap;
std::map<uint64_t, ValueType> overMap;
std::map<uint64_t, ValueType> underMap;
if (boundMapsSet) {
overMap = overApproximationMap.value();
underMap = underApproximationMap.value();
}
auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision); auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
if (computeRewards) { if (computeRewards) {
beliefManager->setRewardModel(); // TODO: get actual name beliefManager->setRewardModel(); // TODO: get actual name
} }
bsmap_type beliefStateMap;
std::deque<uint64_t> beliefsToBeExpanded;
statistics.overApproximationBuildTime.start(); statistics.overApproximationBuildTime.start();
// Initial belief always has belief ID 0
auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief());
// These are the components to build the MDP from the grid
// Reserve states 0 and 1 as always sink/goal states
storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
uint64_t extraBottomState = 0;
uint64_t extraTargetState = computeRewards ? 0 : 1;
uint64_t nextMdpStateId = extraTargetState + 1;
uint64_t mdpMatrixRow = 0;
for (uint64_t state = 0; state < nextMdpStateId; ++state) {
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
++mdpMatrixRow;
}
// Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
if (!computeRewards) {
hintVector[extraTargetState] = storm::utility::one<ValueType>();
}
std::vector<uint64_t> targetStates = {extraTargetState};
storm::storage::BitVector fullyExpandedStates;
// Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
std::map<uint64_t, ValueType> weightedSumOverMap;
std::map<uint64_t, ValueType> weightedSumUnderMap;
// for the initial belief, add the triangulated initial states
auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]);
uint64_t initialMdpState = nextMdpStateId;
++nextMdpStateId;
if (triangulation.size() == 1) {
// The initial belief is on the grid itself
auto initBeliefId = triangulation.gridPoints.front();
if (boundMapsSet) {
auto const& gridPoint = beliefManager->getBelief(initBeliefId);
weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
}
beliefsToBeExpanded.push_back(initBeliefId);
beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>());
storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
if (computeRewards) {
explorer.startNewExploration(storm::utility::zero<ValueType>());
} else { } else {
// If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
for (uint64_t i = 0; i < triangulation.size(); ++i) {
beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
++nextMdpStateId;
if (boundMapsSet) {
auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]);
weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
}
hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>());
}
//beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
++mdpMatrixRow;
explorer.startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
} }
// Expand the beliefs to generate the grid on-the-fly // Expand the beliefs to generate the grid on-the-fly
if (options.explorationThreshold > storm::utility::zero<ValueType>()) { if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
} }
storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
for (auto const& belId : beliefsToBeExpanded) {
foundBeliefs.set(belId, true);
}
while (!beliefsToBeExpanded.empty()) {
uint64_t currId = beliefsToBeExpanded.front();
beliefsToBeExpanded.pop_front();
while (explorer.hasUnexploredState()) {
uint64_t currId = explorer.exploreNextState();
uint64_t currMdpState = beliefStateMap.left.at(currId);
uint32_t currObservation = beliefManager->getBeliefObservation(currId); uint32_t currObservation = beliefManager->getBeliefObservation(currId);
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
if (targetObservations.count(currObservation) != 0) { if (targetObservations.count(currObservation) != 0) {
// Make this state absorbing
targetStates.push_back(currMdpState);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
++mdpMatrixRow;
} else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
// TODO: with rewards we would have to assign the corresponding reward to this transition
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
++mdpMatrixRow;
explorer.setCurrentStateIsTarget();
explorer.addSelfloopTransition();
} else { } else {
fullyExpandedStates.grow(nextMdpStateId, false);
fullyExpandedStates.set(currMdpState, true);
uint64_t someState = beliefManager->getBelief(currId).begin()->first;
uint64_t numChoices = pomdp.getNumberOfChoices(someState);
for (uint64_t action = 0; action < numChoices; ++action) {
bool stopExploration = false;
if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
stopExploration = true;
explorer.setCurrentStateIsTruncated();
}
for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
ValueType truncationProbability = storm::utility::zero<ValueType>();
ValueType truncationValueBound = storm::utility::zero<ValueType>();
auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
// Check for newly found grid points
foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
for (auto const& successor : successorGridPoints) { for (auto const& successor : successorGridPoints) {
auto successorId = successor.first;
auto const& successorBelief = beliefManager->getBelief(successorId);
auto successorObservation = beliefManager->getBeliefObservation(successorBelief);
if (!foundBeliefs.get(successorId)) {
foundBeliefs.set(successorId);
beliefsToBeExpanded.push_back(successorId);
beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
++nextMdpStateId;
if (boundMapsSet) {
ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
if (cc.isEqual(upperBound, lowerBound)) {
hintVector.push_back(lowerBound);
} else {
hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
}
weightedSumOverMap[successorId] = upperBound;
weightedSumUnderMap[successorId] = lowerBound;
} else {
hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
}
bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration);
if (!added) {
STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
// We did not explore this successor state. Get a bound on the "missing" value
truncationProbability += successor.second;
truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first));
} }
auto successorMdpState = beliefStateMap.left.at(successorId);
// This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
} }
++mdpMatrixRow;
if (stopExploration) {
if (computeRewards) {
explorer.addTransitionsToExtraStates(action, truncationProbability);
} else {
explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
}
}
if (computeRewards) {
// The truncationValueBound will be added on top of the reward introduced by the current belief state.
explorer.computeRewardAtCurrentState(action, truncationValueBound);
}
} }
} }
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
@ -525,64 +437,30 @@ namespace storm {
break; break;
} }
} }
statistics.overApproximationStates = nextMdpStateId;
STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates();
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
statistics.overApproximationBuildTime.stop(); statistics.overApproximationBuildTime.stop();
return nullptr; return nullptr;
} }
fullyExpandedStates.resize(nextMdpStateId, false);
storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
mdpLabeling.addLabel("init");
mdpLabeling.addLabel("target");
mdpLabeling.addLabelToState("init", initialMdpState);
for (auto targetState : targetStates) {
mdpLabeling.addLabelToState("target", targetState);
}
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
for (auto const &iter : beliefStateMap.left) {
if (fullyExpandedStates.get(iter.second)) {
auto const& currentBelief = beliefManager->getBelief(iter.first);
auto representativeState = currentBelief.begin()->first;
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
}
}
}
overApproxMdp->addRewardModel("default", mdpRewardModel);
}
explorer.finishExploration();
statistics.overApproximationBuildTime.stop(); statistics.overApproximationBuildTime.stop();
STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
overApproxMdp->printModelInformationToStream(std::cout);
auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
auto property = createStandardProperty(min, computeRewards);
auto task = createStandardCheckTask(property, std::move(hintVector));
explorer.getExploredMdp()->printModelInformationToStream(std::cout);
statistics.overApproximationCheckTime.start(); statistics.overApproximationCheckTime.start();
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
statistics.overApproximationCheckTime.stop(); statistics.overApproximationCheckTime.stop();
if (storm::utility::resources::isTerminate() && !res) {
return nullptr;
}
STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto overApprox = overApproxResultMap[initialMdpState];
STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
//auto underApprox = weightedSumUnderMap[initialBelief.id]; //auto underApprox = weightedSumUnderMap[initialBelief.id];
auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
if (storm::utility::resources::isTerminate() && !underApproxComponents) { if (storm::utility::resources::isTerminate() && !underApproxComponents) {
// TODO: return other components needed for refinement. // TODO: return other components needed for refinement.
//return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
//return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
} }
STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
@ -592,8 +470,8 @@ namespace storm {
underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
*/ */
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
} }
@ -930,14 +808,14 @@ namespace storm {
std::unique_ptr<POMDPCheckResult<ValueType>> std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) { ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
// return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>> std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) { ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
// return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
} }
@ -1191,7 +1069,7 @@ namespace storm {
auto representativeState = currentBelief.begin()->first; auto representativeState = currentBelief.begin()->first;
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action));
} }
} }
} }

9
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -117,8 +117,7 @@ namespace storm {
*/ */
std::shared_ptr<RefinementComponents<ValueType>> std::shared_ptr<RefinementComponents<ValueType>>
computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector, computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
std::shared_ptr<RefinementComponents<ValueType>> std::shared_ptr<RefinementComponents<ValueType>>
computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector, computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
@ -140,10 +139,8 @@ namespace storm {
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/ */
std::unique_ptr<POMDPCheckResult<ValueType>> std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector, bool computeRewards,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
/** /**
* Helper to compute an underapproximation of the reachability property. * Helper to compute an underapproximation of the reachability property.

24
src/storm-pomdp/storage/BeliefManager.h

@ -45,6 +45,8 @@ namespace storm {
}; };
BeliefType const& getBelief(BeliefId const& id) const { BeliefType const& getBelief(BeliefId const& id) const {
STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief.");
STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range.");
return beliefs[id]; return beliefs[id];
} }
@ -54,6 +56,10 @@ namespace storm {
return idIt->second; return idIt->second;
} }
BeliefId noId() const {
return std::numeric_limits<BeliefId>::max();
}
std::string toString(BeliefType const& belief) const { std::string toString(BeliefType const& belief) const {
std::stringstream str; std::stringstream str;
str << "{ "; str << "{ ";
@ -180,11 +186,22 @@ namespace storm {
return true; return true;
} }
template <typename SummandsType>
ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) {
ValueType result = storm::utility::zero<ValueType>();
for (auto const& entry : getBelief(beliefId)) {
result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
}
return result;
}
BeliefId const& getInitialBelief() const { BeliefId const& getInitialBelief() const {
return initialBeliefId; return initialBeliefId;
} }
ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const {
auto const& belief = getBelief(beliefId);
STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
auto result = storm::utility::zero<ValueType>(); auto result = storm::utility::zero<ValueType>();
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
@ -206,6 +223,11 @@ namespace storm {
return getBeliefObservation(getBelief(beliefId)); return getBeliefObservation(getBelief(beliefId));
} }
uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
auto belief = getBelief(beliefId);
return pomdp.getNumberOfChoices(belief.begin()->first);
}
Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
//TODO this can also be simplified using the sparse vector interpretation //TODO this can also be simplified using the sparse vector interpretation

Loading…
Cancel
Save