Browse Source

Renamed BeliefGrid to BeliefManager

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
0b552e6813
  1. 69
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 4
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
  3. 89
      src/storm-pomdp/storage/BeliefManager.h

69
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -380,9 +380,9 @@ namespace storm {
underMap = underApproximationMap.value(); underMap = underApproximationMap.value();
} }
storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>> beliefGrid(pomdp, options.numericPrecision);
auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
if (computeRewards) { if (computeRewards) {
beliefGrid.setRewardModel();
beliefManager->setRewardModel(); // TODO: get actual name
} }
bsmap_type beliefStateMap; bsmap_type beliefStateMap;
@ -391,9 +391,7 @@ namespace storm {
statistics.overApproximationBuildTime.start(); statistics.overApproximationBuildTime.start();
// Initial belief always has belief ID 0 // Initial belief always has belief ID 0
auto initialBeliefId = beliefGrid.getInitialBelief();
auto const& initialBelief = beliefGrid.getGridPoint(initialBeliefId);
auto initialObservation = beliefGrid.getBeliefObservation(initialBelief);
auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief());
// These are the components to build the MDP from the grid // These are the components to build the MDP from the grid
// Reserve states 0 and 1 as always sink/goal states // Reserve states 0 and 1 as always sink/goal states
storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true); storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
@ -419,14 +417,14 @@ namespace storm {
std::map<uint64_t, ValueType> weightedSumUnderMap; std::map<uint64_t, ValueType> weightedSumUnderMap;
// for the initial belief, add the triangulated initial states // for the initial belief, add the triangulated initial states
auto triangulation = beliefGrid.triangulateBelief(initialBelief, observationResolutionVector[initialObservation]);
auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]);
uint64_t initialMdpState = nextMdpStateId; uint64_t initialMdpState = nextMdpStateId;
++nextMdpStateId; ++nextMdpStateId;
if (triangulation.size() == 1) { if (triangulation.size() == 1) {
// The initial belief is on the grid itself // The initial belief is on the grid itself
auto initBeliefId = triangulation.gridPoints.front(); auto initBeliefId = triangulation.gridPoints.front();
if (boundMapsSet) { if (boundMapsSet) {
auto const& gridPoint = beliefGrid.getGridPoint(initBeliefId);
auto const& gridPoint = beliefManager->getBelief(initBeliefId);
weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap); weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap); weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
} }
@ -443,7 +441,7 @@ namespace storm {
beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
++nextMdpStateId; ++nextMdpStateId;
if (boundMapsSet) { if (boundMapsSet) {
auto const& gridPoint = beliefGrid.getGridPoint(triangulation.gridPoints[i]);
auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]);
weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap); weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap); weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
} }
@ -458,7 +456,7 @@ namespace storm {
if (options.explorationThreshold > storm::utility::zero<ValueType>()) { if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
} }
storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false);
storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
for (auto const& belId : beliefsToBeExpanded) { for (auto const& belId : beliefsToBeExpanded) {
foundBeliefs.set(belId, true); foundBeliefs.set(belId, true);
} }
@ -467,7 +465,7 @@ namespace storm {
beliefsToBeExpanded.pop_front(); beliefsToBeExpanded.pop_front();
uint64_t currMdpState = beliefStateMap.left.at(currId); uint64_t currMdpState = beliefStateMap.left.at(currId);
uint32_t currObservation = beliefGrid.getBeliefObservation(currId);
uint32_t currObservation = beliefManager->getBeliefObservation(currId);
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
@ -484,17 +482,17 @@ namespace storm {
} else { } else {
fullyExpandedStates.grow(nextMdpStateId, false); fullyExpandedStates.grow(nextMdpStateId, false);
fullyExpandedStates.set(currMdpState, true); fullyExpandedStates.set(currMdpState, true);
uint64_t someState = beliefGrid.getGridPoint(currId).begin()->first;
uint64_t someState = beliefManager->getBelief(currId).begin()->first;
uint64_t numChoices = pomdp.getNumberOfChoices(someState); uint64_t numChoices = pomdp.getNumberOfChoices(someState);
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector);
auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
// Check for newly found grid points // Check for newly found grid points
foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false);
foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
for (auto const& successor : successorGridPoints) { for (auto const& successor : successorGridPoints) {
auto successorId = successor.first; auto successorId = successor.first;
auto const& successorBelief = beliefGrid.getGridPoint(successorId);
auto successorObservation = beliefGrid.getBeliefObservation(successorBelief);
auto const& successorBelief = beliefManager->getBelief(successorId);
auto successorObservation = beliefManager->getBeliefObservation(successorBelief);
if (!foundBeliefs.get(successorId)) { if (!foundBeliefs.get(successorId)) {
foundBeliefs.set(successorId); foundBeliefs.set(successorId);
beliefsToBeExpanded.push_back(successorId); beliefsToBeExpanded.push_back(successorId);
@ -548,11 +546,11 @@ namespace storm {
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>())); storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
for (auto const &iter : beliefStateMap.left) { for (auto const &iter : beliefStateMap.left) {
if (fullyExpandedStates.get(iter.second)) { if (fullyExpandedStates.get(iter.second)) {
auto const& currentBelief = beliefGrid.getGridPoint(iter.first);
auto const& currentBelief = beliefManager->getBelief(iter.first);
auto representativeState = currentBelief.begin()->first; auto representativeState = currentBelief.begin()->first;
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
} }
} }
} }
@ -575,16 +573,16 @@ namespace storm {
STORM_LOG_ASSERT(res, "Result does not exist."); STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap(); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
auto overApprox = overApproxResultMap[initialMdpState];
STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
//auto underApprox = weightedSumUnderMap[initialBelief.id]; //auto underApprox = weightedSumUnderMap[initialBelief.id];
auto underApproxComponents = computeUnderapproximation(beliefGrid, targetObservations, min, computeRewards, maxUaModelSize);
auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize);
if (storm::utility::resources::isTerminate() && !underApproxComponents) { if (storm::utility::resources::isTerminate() && !underApproxComponents) {
// TODO: return other components needed for refinement. // TODO: return other components needed for refinement.
//return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); //return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, initialBeliefId});
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
} }
STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
@ -595,7 +593,7 @@ namespace storm {
underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
*/ */
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBeliefId});
underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
} }
@ -1075,7 +1073,7 @@ namespace storm {
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
std::set<uint32_t> const &targetObservations, bool min, std::set<uint32_t> const &targetObservations, bool min,
bool computeRewards, uint64_t maxModelSize) { bool computeRewards, uint64_t maxModelSize) {
// Build the belief MDP until enough states are explored. // Build the belief MDP until enough states are explored.
@ -1100,13 +1098,12 @@ namespace storm {
bsmap_type beliefStateMap; bsmap_type beliefStateMap;
std::deque<uint64_t> beliefsToBeExpanded; std::deque<uint64_t> beliefsToBeExpanded;
auto initialBeliefId = beliefGrid.getInitialBelief();
beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId));
beliefsToBeExpanded.push_back(initialBeliefId);
beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId));
beliefsToBeExpanded.push_back(beliefManager->getInitialBelief());
++nextMdpStateId; ++nextMdpStateId;
// Expand the believes // Expand the believes
storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false);
storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false);
for (auto const& belId : beliefsToBeExpanded) { for (auto const& belId : beliefsToBeExpanded) {
foundBeliefs.set(belId, true); foundBeliefs.set(belId, true);
} }
@ -1115,8 +1112,8 @@ namespace storm {
beliefsToBeExpanded.pop_front(); beliefsToBeExpanded.pop_front();
uint64_t currMdpState = beliefStateMap.left.at(currId); uint64_t currMdpState = beliefStateMap.left.at(currId);
auto const& currBelief = beliefGrid.getGridPoint(currId);
uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief);
auto const& currBelief = beliefManager->getBelief(currId);
uint32_t currObservation = beliefManager->getBeliefObservation(currBelief);
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
@ -1147,9 +1144,9 @@ namespace storm {
uint64_t someState = currBelief.begin()->first; uint64_t someState = currBelief.begin()->first;
uint64_t numChoices = pomdp.getNumberOfChoices(someState); uint64_t numChoices = pomdp.getNumberOfChoices(someState);
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
auto successorBeliefs = beliefGrid.expand(currId, action);
auto successorBeliefs = beliefManager->expand(currId, action);
// Check for newly found beliefs // Check for newly found beliefs
foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false);
foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false);
for (auto const& successor : successorBeliefs) { for (auto const& successor : successorBeliefs) {
auto successorId = successor.first; auto successorId = successor.first;
if (!foundBeliefs.get(successorId)) { if (!foundBeliefs.get(successorId)) {
@ -1159,7 +1156,7 @@ namespace storm {
++nextMdpStateId; ++nextMdpStateId;
} }
auto successorMdpState = beliefStateMap.left.at(successorId); auto successorMdpState = beliefStateMap.left.at(successorId);
// This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
// This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted.
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
} }
++mdpMatrixRow; ++mdpMatrixRow;
@ -1179,7 +1176,7 @@ namespace storm {
storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
mdpLabeling.addLabel("init"); mdpLabeling.addLabel("init");
mdpLabeling.addLabel("target"); mdpLabeling.addLabel("target");
mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId));
mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief()));
for (auto targetState : targetStates) { for (auto targetState : targetStates) {
mdpLabeling.addLabelToState("target", targetState); mdpLabeling.addLabelToState("target", targetState);
} }
@ -1190,11 +1187,11 @@ namespace storm {
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>())); storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow, storm::utility::zero<ValueType>()));
for (auto const &iter : beliefStateMap.left) { for (auto const &iter : beliefStateMap.left) {
if (fullyExpandedStates.get(iter.second)) { if (fullyExpandedStates.get(iter.second)) {
auto const& currentBelief = beliefGrid.getGridPoint(iter.first);
auto const& currentBelief = beliefManager->getBelief(iter.first);
auto representativeState = currentBelief.begin()->first; auto representativeState = currentBelief.begin()->first;
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) {
uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action));
mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action));
} }
} }
} }
@ -1207,8 +1204,8 @@ namespace storm {
auto property = createStandardProperty(min, computeRewards); auto property = createStandardProperty(min, computeRewards);
auto task = createStandardCheckTask(property, std::vector<ValueType>()); auto task = createStandardCheckTask(property, std::vector<ValueType>());
statistics.underApproximationCheckTime.start();
statistics.underApproximationCheckTime.start();
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task)); std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
statistics.underApproximationCheckTime.stop(); statistics.underApproximationCheckTime.stop();
if (storm::utility::resources::isTerminate() && !res) { if (storm::utility::resources::isTerminate() && !res) {
@ -1217,7 +1214,7 @@ namespace storm {
STORM_LOG_ASSERT(res, "Result does not exist."); STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true)));
auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap(); auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())];
return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap}); return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
} }

4
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -3,7 +3,7 @@
#include "storm/models/sparse/Pomdp.h" #include "storm/models/sparse/Pomdp.h"
#include "storm/utility/logging.h" #include "storm/utility/logging.h"
#include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/Belief.h"
#include "storm-pomdp/storage/BeliefGrid.h"
#include "storm-pomdp/storage/BeliefManager.h"
#include <boost/bimap.hpp> #include <boost/bimap.hpp>
#include "storm/storage/jani/Property.h" #include "storm/storage/jani/Property.h"
@ -163,7 +163,7 @@ namespace storm {
std::set<uint32_t> const &targetObservations, std::set<uint32_t> const &targetObservations,
uint64_t initialBeliefId, bool min, bool computeReward, uint64_t initialBeliefId, bool min, bool computeReward,
uint64_t maxModelSize); uint64_t maxModelSize);
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>>& beliefGrid,
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
std::set<uint32_t> const &targetObservations, bool min, bool computeReward, std::set<uint32_t> const &targetObservations, bool min, bool computeReward,
uint64_t maxModelSize); uint64_t maxModelSize);

89
src/storm-pomdp/storage/BeliefGrid.h → src/storm-pomdp/storage/BeliefManager.h

@ -11,8 +11,7 @@ namespace storm {
namespace storage { namespace storage {
template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t> template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t>
// TODO: Change name. This actually does not only manage grid points.
class BeliefGrid {
class BeliefManager {
public: public:
typedef typename PomdpType::ValueType ValueType; typedef typename PomdpType::ValueType ValueType;
@ -20,8 +19,8 @@ namespace storm {
typedef std::map<StateType, BeliefValueType> BeliefType; typedef std::map<StateType, BeliefValueType> BeliefType;
typedef uint64_t BeliefId; typedef uint64_t BeliefId;
BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
// Intentionally left empty
BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
initialBeliefId = computeInitialBelief();
} }
void setRewardModel(boost::optional<std::string> rewardModelName = boost::none) { void setRewardModel(boost::optional<std::string> rewardModelName = boost::none) {
@ -45,13 +44,13 @@ namespace storm {
} }
}; };
BeliefType const& getGridPoint(BeliefId const& id) const {
return gridPoints[id];
BeliefType const& getBelief(BeliefId const& id) const {
return beliefs[id];
} }
BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const {
auto idIt = gridPointToIdMap.find(gridPoint);
STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state.");
BeliefId getId(BeliefType const& belief) const {
auto idIt = beliefToIdMap.find(belief);
STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief.");
return idIt->second; return idIt->second;
} }
@ -75,7 +74,7 @@ namespace storm {
std::stringstream str; std::stringstream str;
str << "(\n"; str << "(\n";
for (uint64_t i = 0; i < t.size(); ++i) { for (uint64_t i = 0; i < t.size(); ++i) {
str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n";
str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n";
} }
str <<")\n"; str <<")\n";
return str.str(); return str.str();
@ -161,7 +160,7 @@ namespace storm {
STORM_LOG_ERROR("Weight greater than one in triangulation."); STORM_LOG_ERROR("Weight greater than one in triangulation.");
} }
weightSum += triangulation.weights[i]; weightSum += triangulation.weights[i];
BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]);
BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]);
for (auto const& pointEntry : gridPoint) { for (auto const& pointEntry : gridPoint) {
BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second; BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second;
triangulatedValue += triangulation.weights[i] * pointEntry.second; triangulatedValue += triangulation.weights[i] * pointEntry.second;
@ -181,16 +180,8 @@ namespace storm {
return true; return true;
} }
BeliefId getInitialBelief() {
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
"POMDP contains more than one initial state");
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
"POMDP does not contain an initial state");
BeliefType belief;
belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>();
STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
return getOrAddGridPointId(belief);
BeliefId const& getInitialBelief() const {
return initialBeliefId;
} }
ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const {
@ -212,7 +203,7 @@ namespace storm {
} }
uint32_t getBeliefObservation(BeliefId beliefId) { uint32_t getBeliefObservation(BeliefId beliefId) {
return getBeliefObservation(getGridPoint(beliefId));
return getBeliefObservation(getBelief(beliefId));
} }
@ -286,7 +277,7 @@ namespace storm {
if (!cc.isZero(qsj[nrStates - 1])) { if (!cc.isZero(qsj[nrStates - 1])) {
gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution;
} }
result.gridPoints.push_back(getOrAddGridPointId(gridPoint));
result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
} }
} }
@ -294,6 +285,10 @@ namespace storm {
return result; return result;
} }
Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) {
return triangulateBelief(getBelief(beliefId), resolution);
}
template<typename DistributionType> template<typename DistributionType>
void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
auto insertionRes = distr.emplace(state, value); auto insertionRes = distr.emplace(state, value);
@ -302,19 +297,19 @@ namespace storm {
} }
} }
BeliefId getNumberOfGridPointIds() const {
return gridPoints.size();
BeliefId getNumberOfBeliefIds() const {
return beliefs.size();
} }
std::map<BeliefId, ValueType> expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
std::map<BeliefId, ValueType> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
// TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds
BeliefType gridPoint = getGridPoint(gridPointId);
BeliefType belief = getBelief(beliefId);
// Find the probability we go to each observation // Find the probability we go to each observation
BeliefType successorObs; // This is actually not a belief but has the same type BeliefType successorObs; // This is actually not a belief but has the same type
for (auto const& pointEntry : gridPoint) {
for (auto const& pointEntry : belief) {
uint64_t state = pointEntry.first; uint64_t state = pointEntry.first;
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
if (!storm::utility::isZero(pomdpTransition.getValue())) { if (!storm::utility::isZero(pomdpTransition.getValue())) {
@ -327,7 +322,7 @@ namespace storm {
// Now for each successor observation we find and potentially triangulate the successor belief // Now for each successor observation we find and potentially triangulate the successor belief
for (auto const& successor : successorObs) { for (auto const& successor : successorObs) {
BeliefType successorBelief; BeliefType successorBelief;
for (auto const& pointEntry : gridPoint) {
for (auto const& pointEntry : belief) {
uint64_t state = pointEntry.first; uint64_t state = pointEntry.first;
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) {
@ -344,7 +339,7 @@ namespace storm {
addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
} }
} else { } else {
addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second);
addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second);
} }
} }
@ -352,21 +347,33 @@ namespace storm {
} }
std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
return expandInternal(gridPointId, actionIndex, observationResolutions);
std::map<BeliefId, ValueType> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
return expandInternal(beliefId, actionIndex, observationResolutions);
} }
std::map<BeliefId, ValueType> expand(BeliefId const& gridPointId, uint64_t actionIndex) {
return expandInternal(gridPointId, actionIndex);
std::map<BeliefId, ValueType> expand(BeliefId const& beliefId, uint64_t actionIndex) {
return expandInternal(beliefId, actionIndex);
} }
private: private:
BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size());
BeliefId computeInitialBelief() {
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
"POMDP contains more than one initial state");
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
"POMDP does not contain an initial state");
BeliefType belief;
belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>();
STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
return getOrAddBeliefId(belief);
}
BeliefId getOrAddBeliefId(BeliefType const& belief) {
auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size());
if (insertioRes.second) { if (insertioRes.second) {
// There actually was an insertion, so add the new grid state
gridPoints.push_back(gridPoint);
// There actually was an insertion, so add the new belief
beliefs.push_back(belief);
} }
// Return the id // Return the id
return insertioRes.first->second; return insertioRes.first->second;
@ -375,8 +382,10 @@ namespace storm {
PomdpType const& pomdp; PomdpType const& pomdp;
std::vector<ValueType> pomdpActionRewardVector; std::vector<ValueType> pomdpActionRewardVector;
std::vector<BeliefType> gridPoints;
std::map<BeliefType, BeliefId> gridPointToIdMap;
std::vector<BeliefType> beliefs;
std::map<BeliefType, BeliefId> beliefToIdMap;
BeliefId initialBeliefId;
storm::utility::ConstantsComparator<ValueType> cc; storm::utility::ConstantsComparator<ValueType> cc;
Loading…
Cancel
Save