From d2e36e9db35e853453b0fcc6f425075f3236562f Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Mon, 14 Sep 2020 14:24:12 -0700 Subject: [PATCH] dense belief states --- .../NondeterministicBeliefTracker.cpp | 246 +++++++++++++++++- .../generator/NondeterministicBeliefTracker.h | 45 +++- 2 files changed, 286 insertions(+), 5 deletions(-) diff --git a/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp b/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp index 065b5f127..e330b7f90 100644 --- a/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp +++ b/src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp @@ -1,6 +1,8 @@ #include "storm-pomdp/generator/NondeterministicBeliefTracker.h" #include "storm/utility/ConstantsComparator.h" +#include "storm/storage/geometry/nativepolytopeconversion/QuickHull.h" +#include "storm/utility/vector.h" namespace storm { namespace generator { @@ -10,11 +12,24 @@ namespace storm { : pomdp(pomdp) { numberActionsPerObservation = std::vector(pomdp.getNrObservations(), 0); + statePerObservationAndOffset = std::vector>(pomdp.getNrObservations(), std::vector()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { numberActionsPerObservation[pomdp.getObservation(state)] = pomdp.getNumberOfChoices(state); + statePerObservationAndOffset[pomdp.getObservation(state)].push_back(state); + observationOffsetId.push_back(statePerObservationAndOffset[pomdp.getObservation(state)].size() - 1); } } + template + uint32_t BeliefStateManager::getObservation(uint64_t state) const { + return pomdp.getObservation(state); + } + + template + uint64_t BeliefStateManager::getNumberOfStates() const { + return pomdp.getNumberOfStates(); + } + template uint64_t BeliefStateManager::getActionsForObservation(uint32_t observation) const { return numberActionsPerObservation[observation]; @@ -38,10 +53,28 @@ namespace storm { template uint64_t BeliefStateManager::getFreshId() { beliefIdCounter++; - std::cout << "provide " << beliefIdCounter; return beliefIdCounter; } + template + uint64_t BeliefStateManager::getObservationOffset(uint64_t state) const { + STORM_LOG_ASSERT(state < observationOffsetId.size(), "State " << state << " not a state id"); + return observationOffsetId[state]; + } + + template + uint64_t BeliefStateManager::numberOfStatesPerObservation(uint32_t observation) const { + STORM_LOG_ASSERT(observation < observationOffsetId.size(), "Observation " << observation << " not an observation id"); + return statePerObservationAndOffset[observation].size(); + } + + template + uint64_t BeliefStateManager::getState(uint32_t obs, uint64_t offset) const { + STORM_LOG_ASSERT(obs < statePerObservationAndOffset.size(), "Obs " << obs << " not a known observatoin"); + STORM_LOG_ASSERT(offset < statePerObservationAndOffset[obs].size(), "Offset " << offset << " too high for observation " << obs); + return statePerObservationAndOffset[obs][offset]; + } + template SparseBeliefState::SparseBeliefState(std::shared_ptr> const& manager, uint64_t state) : manager(manager), belief(), id(0), prevId(0) @@ -155,6 +188,21 @@ namespace storm { updateHelper({{}}, {storm::utility::zero()}, belief.begin(), newObservation, previousBeliefs); } + template + Eigen::Matrix SparseBeliefState::toEigenVector(storm::storage::BitVector const& support) const { + assert(false); + } + + template + uint64_t SparseBeliefState::getSupportSize() const { + return manager->getNumberOfStates(); + } + + template + void SparseBeliefState::setSupport(storm::storage::BitVector& support) const { + assert(false); + } + template void SparseBeliefState::updateHelper(std::vector> const& partialBeliefs, std::vector const& sums, typename std::map::const_iterator nextStateIt, uint32_t newObservation, std::unordered_set>& previousBeliefs) const { if(nextStateIt == belief.end()) { @@ -210,6 +258,164 @@ namespace storm { } } + template + bool operator==(ObservationDenseBeliefState const& lhs, ObservationDenseBeliefState const& rhs) { + if (lhs.hash() != rhs.hash()) { + return false; + } + if (lhs.observation != rhs.observation) { + return false; + } + storm::utility::ConstantsComparator cmp(0.00001, true); + auto lhsIt = lhs.belief.begin(); + auto rhsIt = rhs.belief.begin(); + while(lhsIt != lhs.belief.end()) { + if (!cmp.isEqual(*lhsIt, *rhsIt)) { + return false; + } + ++lhsIt; + ++rhsIt; + } + return true; + //return std::equal(lhs.belief.begin(), lhs.belief.end(), rhs.belief.begin()); + } + + template + ObservationDenseBeliefState::ObservationDenseBeliefState(std::shared_ptr> const& manager, uint64_t state) + : manager(manager), belief(manager->numberOfStatesPerObservation(manager->getObservation(state))) + { + observation = manager->getObservation(state); + belief[manager->getObservationOffset(state)] = storm::utility::one(); + } + + template + ObservationDenseBeliefState::ObservationDenseBeliefState(std::shared_ptr> const& manager, uint32_t observation, std::vector const& belief, std::size_t newHash, ValueType const& risk, uint64_t prevId) + : manager(manager), belief(belief), observation(observation), prestoredhash(newHash), risk(risk), id(manager->getFreshId()), prevId(prevId) + { + // Intentionally left empty. + } + + + template + void ObservationDenseBeliefState::update(uint32_t newObservation, std::unordered_set& previousBeliefs) const { + updateHelper({{}}, {storm::utility::zero()}, 0, newObservation, previousBeliefs); + } + + template + void ObservationDenseBeliefState::updateHelper(std::vector> const& partialBeliefs, std::vector const& sums, uint64_t currentEntry, uint32_t newObservation, std::unordered_set>& previousBeliefs) const { + while(currentEntry != belief.size() && storm::utility::isZero(belief[currentEntry])) { + currentEntry++; + } + if(currentEntry == belief.size()) { + for (uint64_t i = 0; i < partialBeliefs.size(); ++i) { + auto const& partialBelief = partialBeliefs[i]; + auto const& sum = sums[i]; + if (storm::utility::isZero(sum)) { + continue; + } + std::size_t newHash = 0; + ValueType risk = storm::utility::zero(); + std::vector finalBelief(manager->numberOfStatesPerObservation(observation), storm::utility::zero()); + for (auto &entry : partialBelief) { + assert(!storm::utility::isZero(sum)); + finalBelief[manager->getObservationOffset(entry.first)] = (entry.second / sum); + //boost::hash_combine(newHash, std::hash()(entry.second)); + boost::hash_combine(newHash, entry.first); + risk += entry.second / sum * manager->getRisk(entry.first); + } + previousBeliefs.insert(ObservationDenseBeliefState(manager, newObservation, finalBelief, newHash, risk, id)); + } + } else { + uint64_t state = manager->getState(observation,currentEntry); + uint64_t nextEntry = currentEntry + 1; + std::vector> newPartialBeliefs; + std::vector newSums; + for (uint64_t i = 0; i < partialBeliefs.size(); ++i) { + + for (auto row = manager->getPomdp().getNondeterministicChoiceIndices()[state]; + row < manager->getPomdp().getNondeterministicChoiceIndices()[state + 1]; ++row) { + std::map newPartialBelief = partialBeliefs[i]; + ValueType newSum = sums[i]; + for (auto const &transition : manager->getPomdp().getTransitionMatrix().getRow(row)) { + if (newObservation != manager->getPomdp().getObservation(transition.getColumn())) { + continue; + } + + if (newPartialBelief.count(transition.getColumn()) == 0) { + newPartialBelief[transition.getColumn()] = transition.getValue() * belief[currentEntry]; + } else { + newPartialBelief[transition.getColumn()] += transition.getValue() * belief[currentEntry]; + } + newSum += transition.getValue() * belief[currentEntry]; + + } + newPartialBeliefs.push_back(newPartialBelief); + newSums.push_back(newSum); + } + } + updateHelper(newPartialBeliefs, newSums, nextEntry, newObservation, previousBeliefs); + } + } + + template + std::size_t ObservationDenseBeliefState::hash() const noexcept { + return prestoredhash; + } + + template + ValueType ObservationDenseBeliefState::get(uint64_t state) const { + if (manager->getObservation(state) != state) { + return storm::utility::zero(); + } + return belief[manager->getObservationOffset(state)]; + } + + template + ValueType ObservationDenseBeliefState::getRisk() const { + return risk; + } + + template + Eigen::Matrix ObservationDenseBeliefState::toEigenVector(storm::storage::BitVector const& support) const { + return storm::adapters::EigenAdapter::toEigenVector(storm::utility::vector::filterVector(belief, support)); + } + + template + uint64_t ObservationDenseBeliefState::getSupportSize() const { + return belief.size(); + } + + template + void ObservationDenseBeliefState::setSupport(storm::storage::BitVector& support) const { + storm::utility::vector::setNonzeroIndices(belief, support); + std::cout << "Belief is " << storm::utility::vector::toString(belief) << std::endl; + std::cout << "Support is now " << support << std::endl; + } + + + template + std::string ObservationDenseBeliefState::toString() const { + std::stringstream sstr; + sstr << "id: " << id << "; "; + bool first = true; + uint64_t i = 0; + for (auto const& beliefentry : belief) { + + if (!storm::utility::isZero(beliefentry)) { + if (!first) { + sstr << ", "; + } else { + first = false; + } + + sstr << manager->getState(observation, i) << " : " << beliefentry; + } + i++; + } + sstr << " (from " << prevId << ")"; + return sstr.str(); + } + template NondeterministicBeliefTracker::NondeterministicBeliefTracker(storm::models::sparse::Pomdp const& pomdp) : @@ -279,9 +485,47 @@ namespace storm { return lastObservation; } +// +// template +// void NondeterministicBeliefTracker::reduce() { +// std::cout << "reduce" << std::endl; +// storm::storage::BitVector support(beliefs.begin()->getSupportSize()); +// std::cout << "supp generated" << std::endl; +// for(auto const& belief : beliefs) { +// belief.setSupport(support); +// } +// std::cout << "Support:" << support << std::endl; +// +// if (beliefs.size() <= support.getNumberOfSetBits()) { +// return; +// } +// +// std::vector::EigenVector> points; +// for(auto const& bel : beliefs) { +// std::cout << bel.toEigenVector(support) << std::endl; +// points.push_back(bel.toEigenVector(support)); +// } +// +// storm::storage::geometry::QuickHull qh; +// std::cout << "Run QH (dim=" << points[0].size() << ")" << std::endl; +// qh.generateHalfspacesFromPoints(points, true); +// std::cout << "End QH" << std::endl; +// auto filteredPoints = qh.getRelevantVertices(); +// //if (filteredPoints.size() < points.size()) { +// // for(auto const& fp : filteredPoints) { +// // beliefs.emplace(manager,filteredPoints); +// // } +// //} +// } + + template class SparseBeliefState; template bool operator==(SparseBeliefState const&, SparseBeliefState const&); template class NondeterministicBeliefTracker>; + template class ObservationDenseBeliefState; + template bool operator==(ObservationDenseBeliefState const&, ObservationDenseBeliefState const&); + template class NondeterministicBeliefTracker>; + } } diff --git a/src/storm-pomdp/generator/NondeterministicBeliefTracker.h b/src/storm-pomdp/generator/NondeterministicBeliefTracker.h index 462a33a93..29f315322 100644 --- a/src/storm-pomdp/generator/NondeterministicBeliefTracker.h +++ b/src/storm-pomdp/generator/NondeterministicBeliefTracker.h @@ -1,5 +1,6 @@ #pragma once #include "storm/models/sparse/Pomdp.h" +#include "storm/adapters/EigenAdapter.h" namespace storm { namespace generator { @@ -12,11 +13,19 @@ namespace storm { ValueType getRisk(uint64_t) const; void setRiskPerState(std::vector const& risk); uint64_t getFreshId(); + uint32_t getObservation(uint64_t state) const; + uint64_t getObservationOffset(uint64_t state) const; + uint64_t getState(uint32_t obs, uint64_t offset) const; + uint64_t getNumberOfStates() const; + uint64_t numberOfStatesPerObservation(uint32_t observation) const; + private: storm::models::sparse::Pomdp const& pomdp; std::vector riskPerState; std::vector numberActionsPerObservation; uint64_t beliefIdCounter = 0; + std::vector observationOffsetId; + std::vector> statePerObservationAndOffset; }; template @@ -34,6 +43,9 @@ namespace storm { ValueType getRisk() const; std::string toString() const; bool isValid() const; + Eigen::Matrix toEigenVector(storm::storage::BitVector const& support) const; + uint64_t getSupportSize() const; + void setSupport(storm::storage::BitVector&) const; friend bool operator==<>(SparseBeliefState const& lhs, SparseBeliefState const& rhs); private: @@ -46,20 +58,38 @@ namespace storm { ValueType risk; uint64_t id; uint64_t prevId; - }; + template + class ObservationDenseBeliefState; + template + bool operator==(ObservationDenseBeliefState const& lhs, ObservationDenseBeliefState const& rhs); + template class ObservationDenseBeliefState { public: ObservationDenseBeliefState(std::shared_ptr> const& manager, uint64_t state); - ObservationDenseBeliefState update(uint64_t action, uint32_t observation) const; + void update(uint32_t newObservation, std::unordered_set& previousBeliefs) const; + std::size_t hash() const noexcept; + ValueType get(uint64_t state) const; + ValueType getRisk() const; + std::string toString() const; + Eigen::Matrix toEigenVector(storm::storage::BitVector const& support) const; + uint64_t getSupportSize() const; + void setSupport(storm::storage::BitVector&) const; + friend bool operator==<>(ObservationDenseBeliefState const& lhs, ObservationDenseBeliefState const& rhs); private: + void updateHelper(std::vector> const& partialBeliefs, std::vector const& sums, uint64_t currentEntry, uint32_t newObservation, std::unordered_set>& previousBeliefs) const; + ObservationDenseBeliefState(std::shared_ptr> const& manager, uint32_t observation, std::vector const& belief, std::size_t newHash, ValueType const& risk, uint64_t prevId); std::shared_ptr> manager; - std::unordered_map belief; - void normalize(); + std::vector belief; + uint64_t observation = 0; + std::size_t prestoredhash = 0; + ValueType risk; + uint64_t id; + uint64_t prevId; }; template @@ -72,6 +102,7 @@ namespace storm { uint32_t getCurrentObservation() const; ValueType getCurrentRisk(bool max=true); void setRisk(std::vector const& risk); + //void reduce(); private: @@ -91,4 +122,10 @@ namespace std { return s.hash(); } }; + template + struct hash> { + std::size_t operator()(storm::generator::ObservationDenseBeliefState const& s) const noexcept { + return s.hash(); + } + }; }