From fd00d495f78a74a96903da9375d055bedc7c5e2c Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Mon, 24 Aug 2020 21:38:53 -0700 Subject: [PATCH] first version of the nondeterministcbelieftracker --- .../NondeterministicBeliefTracker.cpp | 202 ++++++++++++++++++ .../generator/NondeterministicBeliefTracker.h | 88 ++++++++ 2 files changed, 290 insertions(+) create mode 100644 src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp create mode 100644 src/storm-pomdp/generator/NondeterministicBeliefTracker.h diff --git a/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp b/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp new file mode 100644 index 000000000..c0d3bd3e8 --- /dev/null +++ b/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp @@ -0,0 +1,202 @@ + +#include "storm-pomdp/generator/NondeterministicBeliefTracker.h" + +namespace storm { + namespace generator { + + template + BeliefStateManager::BeliefStateManager(storm::models::sparse::Pomdp const& pomdp) + : pomdp(pomdp) + { + numberActionsPerObservation = std::vector(pomdp.getNrObservations(), 0); + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + numberActionsPerObservation[pomdp.getObservation(state)] = pomdp.getNumberOfChoices(state); + } + } + + template + uint64_t BeliefStateManager::getActionsForObservation(uint32_t observation) const { + return numberActionsPerObservation[observation]; + } + + template + ValueType BeliefStateManager::getRisk(uint64_t state) const { + return riskPerState.at(state); + } + + template + storm::models::sparse::Pomdp const& BeliefStateManager::getPomdp() const { + return pomdp; + } + + template + void BeliefStateManager::setRiskPerState(std::vector const& risk) { + riskPerState = risk; + } + + template + SparseBeliefState::SparseBeliefState(std::shared_ptr> const& manager, uint64_t state) + : manager(manager), belief() + { + belief[state] = storm::utility::one(); + risk = manager->getRisk(state); + } + + template + SparseBeliefState::SparseBeliefState(std::shared_ptr> const& manager, std::map const& belief, + std::size_t hash, ValueType const& risk) + : manager(manager), belief(belief), prestoredhash(hash), risk(risk) + { + // Intentionally left empty + } + + template + ValueType SparseBeliefState::get(uint64_t state) const { + return belief.at(state); + } + + template + ValueType SparseBeliefState::getRisk() const { + return risk; + } + + template + std::size_t SparseBeliefState::hash() const noexcept { + return prestoredhash; + } + + template + bool SparseBeliefState::isValid() const { + return !belief.empty(); + } + + template + std::string SparseBeliefState::toString() const { + std::stringstream sstr; + bool first = true; + for (auto const& beliefentry : belief) { + if (!first) { + sstr << ", "; + } else { + first = false; + } + sstr << beliefentry.first << " : " << beliefentry.second; + } + return sstr.str(); + } + + template + bool operator==(SparseBeliefState const& lhs, SparseBeliefState const& rhs) { + return lhs.hash() == rhs.hash() && lhs.belief == rhs.belief; + } + + template + SparseBeliefState SparseBeliefState::update(uint64_t action, uint32_t observation) const { + std::map newBelief; + ValueType sum = storm::utility::zero(); + for (auto const& beliefentry : belief) { + assert(manager->getPomdp().getNumberOfChoices(beliefentry.first) > action); + auto row = manager->getPomdp().getNondeterministicChoiceIndices()[beliefentry.first] + action; + for (auto const& transition : manager->getPomdp().getTransitionMatrix().getRow(row)) { + if (observation != manager->getPomdp().getObservation(transition.getColumn())) { + continue; + } + + if (newBelief.count(transition.getColumn()) == 0) { + newBelief[transition.getColumn()] = transition.getValue() * beliefentry.second; + } else { + newBelief[transition.getColumn()] += transition.getValue() * beliefentry.second; + } + sum += transition.getValue() * beliefentry.second; + } + } + std::size_t newHash = 0; + ValueType risk = storm::utility::zero(); + for(auto& entry : newBelief) { + assert(!storm::utility::isZero(sum)); + entry.second /= sum; + boost::hash_combine(newHash, std::hash()(entry.second)); + boost::hash_combine(newHash, entry.first); + risk += entry.second * manager->getRisk(entry.first); + } + return SparseBeliefState(manager, newBelief, newHash, risk); + } + + + template + NondeterministicBeliefTracker::NondeterministicBeliefTracker(storm::models::sparse::Pomdp const& pomdp) : + pomdp(pomdp), manager(std::make_shared>(pomdp)), beliefs() { + // + } + + template + bool NondeterministicBeliefTracker::reset(uint32_t observation) { + bool hit = false; + for (auto state : pomdp.getInitialStates()) { + if (observation == pomdp.getObservation(state)) { + hit = true; + beliefs.emplace(manager, state); + } + } + lastObservation = observation; + return hit; + } + + template + bool NondeterministicBeliefTracker::track(uint64_t newObservation) { + STORM_LOG_THROW(!beliefs.empty(), storm::exceptions::InvalidOperationException, "Cannot track without a belief (need to reset)."); + std::unordered_set newBeliefs; + for (uint64_t action = 0; action < manager->getActionsForObservation(lastObservation); ++action) { + for (auto const& belief : beliefs) { + auto newBelief = belief.update(action, newObservation); + if (newBelief.isValid()) { + newBeliefs.insert(newBelief); + } + } + } + beliefs = newBeliefs; + lastObservation = newObservation; + return !beliefs.empty(); + } + + template + ValueType NondeterministicBeliefTracker::getCurrentRisk(bool max) { + STORM_LOG_THROW(!beliefs.empty(), storm::exceptions::InvalidOperationException, "Risk is only defined for beliefs (run reset() first)."); + ValueType result = beliefs.begin()->getRisk(); + if (max) { + for (auto const& belief : beliefs) { + if (belief.getRisk() > result) { + result = belief.getRisk(); + } + } + } else { + for (auto const& belief : beliefs) { + if (belief.getRisk() < result) { + result = belief.getRisk(); + } + } + } + return result; + } + + template + void NondeterministicBeliefTracker::setRisk(std::vector const& risk) { + manager->setRiskPerState(risk); + } + + template + std::unordered_set const& NondeterministicBeliefTracker::getCurrentBeliefs() const { + return beliefs; + } + + template + uint32_t NondeterministicBeliefTracker::getCurrentObservation() const { + return lastObservation; + } + + template class SparseBeliefState; + template bool operator==(SparseBeliefState const&, SparseBeliefState const&); + template class NondeterministicBeliefTracker>; + + } +} diff --git a/src/storm-pomdp/generator/NondeterministicBeliefTracker.h b/src/storm-pomdp/generator/NondeterministicBeliefTracker.h new file mode 100644 index 000000000..4712f38c6 --- /dev/null +++ b/src/storm-pomdp/generator/NondeterministicBeliefTracker.h @@ -0,0 +1,88 @@ +#pragma once +#include "storm/models/sparse/Pomdp.h" + +namespace storm { + namespace generator { + template + class BeliefStateManager { + public: + BeliefStateManager(storm::models::sparse::Pomdp const& pomdp); + storm::models::sparse::Pomdp const& getPomdp() const; + uint64_t getActionsForObservation(uint32_t observation) const; + ValueType getRisk(uint64_t) const; + void setRiskPerState(std::vector const& risk); + private: + storm::models::sparse::Pomdp const& pomdp; + std::vector riskPerState; + std::vector numberActionsPerObservation; + }; + + template + class SparseBeliefState; + template + bool operator==(SparseBeliefState const& lhs, SparseBeliefState const& rhs); + template + class SparseBeliefState { + public: + SparseBeliefState(std::shared_ptr> const& manager, uint64_t state); + SparseBeliefState update(uint64_t action, uint32_t observation) const; + std::size_t hash() const noexcept; + ValueType get(uint64_t state) const; + ValueType getRisk() const; + std::string toString() const; + bool isValid() const; + + friend bool operator==<>(SparseBeliefState const& lhs, SparseBeliefState const& rhs); + private: + SparseBeliefState(std::shared_ptr> const& manager, std::map const& belief, std::size_t newHash, ValueType const& risk); + std::shared_ptr> manager; + + std::map belief; // map is ordered for unique hashing. + std::size_t prestoredhash = 0; + ValueType risk; + + }; + + + template + class ObservationDenseBeliefState { + public: + ObservationDenseBeliefState(std::shared_ptr> const& manager, uint64_t state); + ObservationDenseBeliefState update(uint64_t action, uint32_t observation) const; + private: + std::shared_ptr> manager; + std::unordered_map belief; + + void normalize(); + }; + + template + class NondeterministicBeliefTracker { + public: + NondeterministicBeliefTracker(storm::models::sparse::Pomdp const& pomdp); + bool reset(uint32_t observation); + bool track(uint64_t newObservation); + std::unordered_set const& getCurrentBeliefs() const; + uint32_t getCurrentObservation() const; + ValueType getCurrentRisk(bool max=true); + void setRisk(std::vector const& risk); + + private: + + storm::models::sparse::Pomdp const& pomdp; + std::shared_ptr> manager; + std::unordered_set beliefs; + uint32_t lastObservation; + }; + } +} + +// +namespace std { + template + struct hash> { + std::size_t operator()(storm::generator::SparseBeliefState const& s) const noexcept { + return s.hash(); + } + }; +}