Browse Source

Use boost::bimap for the belief space <-> state space mapping

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
8d3254b8d0
  1. 38
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 5
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

38
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -2,7 +2,6 @@
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include "storm/utility/ConstantsComparator.h" #include "storm/utility/ConstantsComparator.h"
#include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/Dtmc.h"
#include "storm/models/sparse/StandardRewardModel.h" #include "storm/models/sparse/StandardRewardModel.h"
@ -105,7 +104,7 @@ namespace storm {
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) { boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
auto result = computeRefinementFirstStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap,
auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap,
underApproximationMap); underApproximationMap);
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue}); return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
} }
@ -113,7 +112,7 @@ namespace storm {
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::unique_ptr<RefinementComponents<ValueType>> std::unique_ptr<RefinementComponents<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementFirstStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min, std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, double explorationThreshold, bool computeRewards, double explorationThreshold,
@ -133,7 +132,7 @@ namespace storm {
//Use caching to avoid multiple computation of the subsimplices and lambdas //Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache; std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache; std::map<uint64_t, std::vector<ValueType>> lambdaCache;
std::map<uint64_t, uint64_t> beliefStateMap;
bsmap_type beliefStateMap;
std::deque<uint64_t> beliefsToBeExpanded; std::deque<uint64_t> beliefsToBeExpanded;
@ -151,8 +150,7 @@ namespace storm {
beliefList.push_back(initialBelief); beliefList.push_back(initialBelief);
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
// These are the components to build the MDP from the grid TODO make a better structure to allow for fast reverse lookups (state-->belief) as it is a bijective function (boost:bimap?)
// These are the components to build the MDP from the grid
// Reserve states 0 and 1 as always sink/goal states // Reserve states 0 and 1 as always sink/goal states
std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}}, std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}},
{{{1, storm::utility::one<ValueType>()}}}}; {{{1, storm::utility::one<ValueType>()}}}};
@ -161,7 +159,7 @@ namespace storm {
std::vector<uint64_t> targetStates = {1}; std::vector<uint64_t> targetStates = {1};
uint64_t mdpStateId = 2; uint64_t mdpStateId = 2;
beliefStateMap[initialBelief.id] = mdpStateId;
beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId));
++mdpStateId; ++mdpStateId;
// Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
@ -173,7 +171,7 @@ namespace storm {
observationResolutionVector[initialBelief.observation]); observationResolutionVector[initialBelief.observation]);
std::vector<std::vector<ValueType>> initSubSimplex = initTemp.first; std::vector<std::vector<ValueType>> initSubSimplex = initTemp.first;
std::vector<ValueType> initLambdas = initTemp.second; std::vector<ValueType> initLambdas = initTemp.second;
if(cacheSubsimplices){
if (cacheSubsimplices) {
subSimplexCache[0] = initSubSimplex; subSimplexCache[0] = initSubSimplex;
lambdaCache[0] = initLambdas; lambdaCache[0] = initLambdas;
} }
@ -225,7 +223,7 @@ namespace storm {
hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>() hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>()); : storm::utility::zero<ValueType>());
beliefStateMap[nextId] = mdpStateId;
beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
initTransitionInActionBelief[mdpStateId] = initLambdas[j]; initTransitionInActionBelief[mdpStateId] = initLambdas[j];
++nextId; ++nextId;
++mdpStateId; ++mdpStateId;
@ -261,9 +259,9 @@ namespace storm {
// Depending on whether we compute rewards, we select the right initial result // Depending on whether we compute rewards, we select the right initial result
// MDP stuff // MDP stuff
std::vector<std::map<uint64_t, ValueType>> transitionsInBelief; std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;
targetStates.push_back(beliefStateMap[currId]);
targetStates.push_back(beliefStateMap.left.at(currId));
std::map<uint64_t, ValueType> transitionInActionBelief; std::map<uint64_t, ValueType> transitionInActionBelief;
transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one<ValueType>();
transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
transitionsInBelief.push_back(transitionInActionBelief); transitionsInBelief.push_back(transitionInActionBelief);
mdpTransitions.push_back(transitionsInBelief); mdpTransitions.push_back(transitionsInBelief);
} else { } else {
@ -332,12 +330,13 @@ namespace storm {
: storm::utility::zero<ValueType>()); : storm::utility::zero<ValueType>());
} }
beliefsToBeExpanded.push_back(nextId); beliefsToBeExpanded.push_back(nextId);
beliefStateMap[nextId] = mdpStateId;
beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; transitionInActionBelief[mdpStateId] = iter->second * lambdas[j];
++nextId; ++nextId;
++mdpStateId; ++mdpStateId;
} else { } else {
transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j];
transitionInActionBelief[beliefStateMap.left.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j]))] =
iter->second * lambdas[j];
} }
} }
} }
@ -361,7 +360,7 @@ namespace storm {
if (transitionsInBelief.empty()) { if (transitionsInBelief.empty()) {
std::map<uint64_t, ValueType> transitionInActionBelief; std::map<uint64_t, ValueType> transitionInActionBelief;
transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one<ValueType>();
transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
transitionsInBelief.push_back(transitionInActionBelief); transitionsInBelief.push_back(transitionInActionBelief);
} }
mdpTransitions.push_back(transitionsInBelief); mdpTransitions.push_back(transitionsInBelief);
@ -374,7 +373,7 @@ namespace storm {
storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size());
mdpLabeling.addLabel("init"); mdpLabeling.addLabel("init");
mdpLabeling.addLabel("target"); mdpLabeling.addLabel("target");
mdpLabeling.addLabelToState("init", beliefStateMap[initialBelief.id]);
mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id));
for (auto targetState : targetStates) { for (auto targetState : targetStates) {
mdpLabeling.addLabelToState("target", targetState); mdpLabeling.addLabelToState("target", targetState);
} }
@ -382,7 +381,7 @@ namespace storm {
storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents); storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
if (computeRewards) { if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount())); storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
for (auto const &iter : beliefStateMap) {
for (auto const &iter : beliefStateMap.left) {
auto currentBelief = beliefList[iter.first]; auto currentBelief = beliefList[iter.first];
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
@ -415,7 +414,7 @@ namespace storm {
STORM_LOG_ASSERT(res, "Result not exist."); STORM_LOG_ASSERT(res, "Result not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap(); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto overApprox = overApproxResultMap[beliefStateMap[initialBelief.id]];
auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)];
STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl)
auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApprox = weightedSumUnderMap[initialBelief.id];
@ -423,9 +422,10 @@ namespace storm {
STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
// Transfer the underapproximation results from the belief id space to the MDP id space // Transfer the underapproximation results from the belief id space to the MDP id space
std::map<uint64_t, ValueType> underApproxResultMap;
std::map<uint64_t, ValueType> underApproxResultMap = {{0, storm::utility::zero<ValueType>()},
{1, storm::utility::one<ValueType>()}};
for (auto const &belief : beliefGrid) { for (auto const &belief : beliefGrid) {
underApproxResultMap[beliefStateMap[belief.id]] = weightedSumUnderMap[belief.id];
underApproxResultMap[beliefStateMap.left.at(belief.id)] = weightedSumUnderMap[belief.id];
} }
return std::make_unique<RefinementComponents<ValueType>>( return std::make_unique<RefinementComponents<ValueType>>(

5
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -3,12 +3,15 @@
#include "storm/models/sparse/Pomdp.h" #include "storm/models/sparse/Pomdp.h"
#include "storm/utility/logging.h" #include "storm/utility/logging.h"
#include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/Belief.h"
#include <boost/bimap.hpp>
#include "storm/storage/jani/Property.h" #include "storm/storage/jani/Property.h"
namespace storm { namespace storm {
namespace pomdp { namespace pomdp {
namespace modelchecker { namespace modelchecker {
typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
template<class ValueType> template<class ValueType>
struct POMDPCheckResult { struct POMDPCheckResult {
ValueType overApproxValue; ValueType overApproxValue;
@ -73,7 +76,7 @@ namespace storm {
* @return * @return
*/ */
std::unique_ptr<RefinementComponents<ValueType>> std::unique_ptr<RefinementComponents<ValueType>>
computeRefinementFirstStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
computeFirstRefinementStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector, std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, double explorationThreshold, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none, bool computeRewards, double explorationThreshold, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none); boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);

Loading…
Cancel
Save