Browse Source

dense belief states

tempestpy_adaptions
Sebastian Junges 4 years ago
parent
commit
d2e36e9db3
  1. 246
      src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp
  2. 45
      src/storm-pomdp/generator/NondeterministicBeliefTracker.h

246
src/storm-pomdp/generator/NondeterministicBeliefTracker.cpp

@ -1,6 +1,8 @@
#include "storm-pomdp/generator/NondeterministicBeliefTracker.h"
#include "storm/utility/ConstantsComparator.h"
#include "storm/storage/geometry/nativepolytopeconversion/QuickHull.h"
#include "storm/utility/vector.h"
namespace storm {
namespace generator {
@ -10,11 +12,24 @@ namespace storm {
: pomdp(pomdp)
{
numberActionsPerObservation = std::vector<uint64_t>(pomdp.getNrObservations(), 0);
statePerObservationAndOffset = std::vector<std::vector<uint64_t>>(pomdp.getNrObservations(), std::vector<uint64_t>());
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
numberActionsPerObservation[pomdp.getObservation(state)] = pomdp.getNumberOfChoices(state);
statePerObservationAndOffset[pomdp.getObservation(state)].push_back(state);
observationOffsetId.push_back(statePerObservationAndOffset[pomdp.getObservation(state)].size() - 1);
}
}
template<typename ValueType>
uint32_t BeliefStateManager<ValueType>::getObservation(uint64_t state) const {
return pomdp.getObservation(state);
}
template<typename ValueType>
uint64_t BeliefStateManager<ValueType>::getNumberOfStates() const {
return pomdp.getNumberOfStates();
}
template<typename ValueType>
uint64_t BeliefStateManager<ValueType>::getActionsForObservation(uint32_t observation) const {
return numberActionsPerObservation[observation];
@ -38,10 +53,28 @@ namespace storm {
template<typename ValueType>
uint64_t BeliefStateManager<ValueType>::getFreshId() {
beliefIdCounter++;
std::cout << "provide " << beliefIdCounter;
return beliefIdCounter;
}
template<typename ValueType>
uint64_t BeliefStateManager<ValueType>::getObservationOffset(uint64_t state) const {
STORM_LOG_ASSERT(state < observationOffsetId.size(), "State " << state << " not a state id");
return observationOffsetId[state];
}
template<typename ValueType>
uint64_t BeliefStateManager<ValueType>::numberOfStatesPerObservation(uint32_t observation) const {
STORM_LOG_ASSERT(observation < observationOffsetId.size(), "Observation " << observation << " not an observation id");
return statePerObservationAndOffset[observation].size();
}
template<typename ValueType>
uint64_t BeliefStateManager<ValueType>::getState(uint32_t obs, uint64_t offset) const {
STORM_LOG_ASSERT(obs < statePerObservationAndOffset.size(), "Obs " << obs << " not a known observatoin");
STORM_LOG_ASSERT(offset < statePerObservationAndOffset[obs].size(), "Offset " << offset << " too high for observation " << obs);
return statePerObservationAndOffset[obs][offset];
}
template<typename ValueType>
SparseBeliefState<ValueType>::SparseBeliefState(std::shared_ptr<BeliefStateManager<ValueType>> const& manager, uint64_t state)
: manager(manager), belief(), id(0), prevId(0)
@ -155,6 +188,21 @@ namespace storm {
updateHelper({{}}, {storm::utility::zero<ValueType>()}, belief.begin(), newObservation, previousBeliefs);
}
template<typename ValueType>
Eigen::Matrix<ValueType, Eigen::Dynamic, 1> SparseBeliefState<ValueType>::toEigenVector(storm::storage::BitVector const& support) const {
assert(false);
}
template<typename ValueType>
uint64_t SparseBeliefState<ValueType>::getSupportSize() const {
return manager->getNumberOfStates();
}
template<typename ValueType>
void SparseBeliefState<ValueType>::setSupport(storm::storage::BitVector& support) const {
assert(false);
}
template<typename ValueType>
void SparseBeliefState<ValueType>::updateHelper(std::vector<std::map<uint64_t, ValueType>> const& partialBeliefs, std::vector<ValueType> const& sums, typename std::map<uint64_t, ValueType>::const_iterator nextStateIt, uint32_t newObservation, std::unordered_set<SparseBeliefState<ValueType>>& previousBeliefs) const {
if(nextStateIt == belief.end()) {
@ -210,6 +258,164 @@ namespace storm {
}
}
template<typename ValueType>
bool operator==(ObservationDenseBeliefState<ValueType> const& lhs, ObservationDenseBeliefState<ValueType> const& rhs) {
if (lhs.hash() != rhs.hash()) {
return false;
}
if (lhs.observation != rhs.observation) {
return false;
}
storm::utility::ConstantsComparator<ValueType> cmp(0.00001, true);
auto lhsIt = lhs.belief.begin();
auto rhsIt = rhs.belief.begin();
while(lhsIt != lhs.belief.end()) {
if (!cmp.isEqual(*lhsIt, *rhsIt)) {
return false;
}
++lhsIt;
++rhsIt;
}
return true;
//return std::equal(lhs.belief.begin(), lhs.belief.end(), rhs.belief.begin());
}
template<typename ValueType>
ObservationDenseBeliefState<ValueType>::ObservationDenseBeliefState(std::shared_ptr<BeliefStateManager<ValueType>> const& manager, uint64_t state)
: manager(manager), belief(manager->numberOfStatesPerObservation(manager->getObservation(state)))
{
observation = manager->getObservation(state);
belief[manager->getObservationOffset(state)] = storm::utility::one<ValueType>();
}
template<typename ValueType>
ObservationDenseBeliefState<ValueType>::ObservationDenseBeliefState(std::shared_ptr<BeliefStateManager<ValueType>> const& manager, uint32_t observation, std::vector<ValueType> const& belief, std::size_t newHash, ValueType const& risk, uint64_t prevId)
: manager(manager), belief(belief), observation(observation), prestoredhash(newHash), risk(risk), id(manager->getFreshId()), prevId(prevId)
{
// Intentionally left empty.
}
template<typename ValueType>
void ObservationDenseBeliefState<ValueType>::update(uint32_t newObservation, std::unordered_set<ObservationDenseBeliefState>& previousBeliefs) const {
updateHelper({{}}, {storm::utility::zero<ValueType>()}, 0, newObservation, previousBeliefs);
}
template<typename ValueType>
void ObservationDenseBeliefState<ValueType>::updateHelper(std::vector<std::map<uint64_t, ValueType>> const& partialBeliefs, std::vector<ValueType> const& sums, uint64_t currentEntry, uint32_t newObservation, std::unordered_set<ObservationDenseBeliefState<ValueType>>& previousBeliefs) const {
while(currentEntry != belief.size() && storm::utility::isZero(belief[currentEntry])) {
currentEntry++;
}
if(currentEntry == belief.size()) {
for (uint64_t i = 0; i < partialBeliefs.size(); ++i) {
auto const& partialBelief = partialBeliefs[i];
auto const& sum = sums[i];
if (storm::utility::isZero(sum)) {
continue;
}
std::size_t newHash = 0;
ValueType risk = storm::utility::zero<ValueType>();
std::vector<ValueType> finalBelief(manager->numberOfStatesPerObservation(observation), storm::utility::zero<ValueType>());
for (auto &entry : partialBelief) {
assert(!storm::utility::isZero(sum));
finalBelief[manager->getObservationOffset(entry.first)] = (entry.second / sum);
//boost::hash_combine(newHash, std::hash<ValueType>()(entry.second));
boost::hash_combine(newHash, entry.first);
risk += entry.second / sum * manager->getRisk(entry.first);
}
previousBeliefs.insert(ObservationDenseBeliefState<ValueType>(manager, newObservation, finalBelief, newHash, risk, id));
}
} else {
uint64_t state = manager->getState(observation,currentEntry);
uint64_t nextEntry = currentEntry + 1;
std::vector<std::map<uint64_t, ValueType>> newPartialBeliefs;
std::vector<ValueType> newSums;
for (uint64_t i = 0; i < partialBeliefs.size(); ++i) {
for (auto row = manager->getPomdp().getNondeterministicChoiceIndices()[state];
row < manager->getPomdp().getNondeterministicChoiceIndices()[state + 1]; ++row) {
std::map<uint64_t, ValueType> newPartialBelief = partialBeliefs[i];
ValueType newSum = sums[i];
for (auto const &transition : manager->getPomdp().getTransitionMatrix().getRow(row)) {
if (newObservation != manager->getPomdp().getObservation(transition.getColumn())) {
continue;
}
if (newPartialBelief.count(transition.getColumn()) == 0) {
newPartialBelief[transition.getColumn()] = transition.getValue() * belief[currentEntry];
} else {
newPartialBelief[transition.getColumn()] += transition.getValue() * belief[currentEntry];
}
newSum += transition.getValue() * belief[currentEntry];
}
newPartialBeliefs.push_back(newPartialBelief);
newSums.push_back(newSum);
}
}
updateHelper(newPartialBeliefs, newSums, nextEntry, newObservation, previousBeliefs);
}
}
template<typename ValueType>
std::size_t ObservationDenseBeliefState<ValueType>::hash() const noexcept {
return prestoredhash;
}
template<typename ValueType>
ValueType ObservationDenseBeliefState<ValueType>::get(uint64_t state) const {
if (manager->getObservation(state) != state) {
return storm::utility::zero<ValueType>();
}
return belief[manager->getObservationOffset(state)];
}
template<typename ValueType>
ValueType ObservationDenseBeliefState<ValueType>::getRisk() const {
return risk;
}
template<typename ValueType>
Eigen::Matrix<ValueType, Eigen::Dynamic, 1> ObservationDenseBeliefState<ValueType>::toEigenVector(storm::storage::BitVector const& support) const {
return storm::adapters::EigenAdapter::toEigenVector(storm::utility::vector::filterVector(belief, support));
}
template<typename ValueType>
uint64_t ObservationDenseBeliefState<ValueType>::getSupportSize() const {
return belief.size();
}
template<typename ValueType>
void ObservationDenseBeliefState<ValueType>::setSupport(storm::storage::BitVector& support) const {
storm::utility::vector::setNonzeroIndices(belief, support);
std::cout << "Belief is " << storm::utility::vector::toString(belief) << std::endl;
std::cout << "Support is now " << support << std::endl;
}
template<typename ValueType>
std::string ObservationDenseBeliefState<ValueType>::toString() const {
std::stringstream sstr;
sstr << "id: " << id << "; ";
bool first = true;
uint64_t i = 0;
for (auto const& beliefentry : belief) {
if (!storm::utility::isZero(beliefentry)) {
if (!first) {
sstr << ", ";
} else {
first = false;
}
sstr << manager->getState(observation, i) << " : " << beliefentry;
}
i++;
}
sstr << " (from " << prevId << ")";
return sstr.str();
}
template<typename ValueType, typename BeliefState>
NondeterministicBeliefTracker<ValueType, BeliefState>::NondeterministicBeliefTracker(storm::models::sparse::Pomdp<ValueType> const& pomdp) :
@ -279,9 +485,47 @@ namespace storm {
return lastObservation;
}
//
// template<typename ValueType, typename BeliefState>
// void NondeterministicBeliefTracker<ValueType, BeliefState>::reduce() {
// std::cout << "reduce" << std::endl;
// storm::storage::BitVector support(beliefs.begin()->getSupportSize());
// std::cout << "supp generated" << std::endl;
// for(auto const& belief : beliefs) {
// belief.setSupport(support);
// }
// std::cout << "Support:" << support << std::endl;
//
// if (beliefs.size() <= support.getNumberOfSetBits()) {
// return;
// }
//
// std::vector<typename storm::storage::geometry::QuickHull<ValueType>::EigenVector> points;
// for(auto const& bel : beliefs) {
// std::cout << bel.toEigenVector(support) << std::endl;
// points.push_back(bel.toEigenVector(support));
// }
//
// storm::storage::geometry::QuickHull<ValueType> qh;
// std::cout << "Run QH (dim=" << points[0].size() << ")" << std::endl;
// qh.generateHalfspacesFromPoints(points, true);
// std::cout << "End QH" << std::endl;
// auto filteredPoints = qh.getRelevantVertices();
// //if (filteredPoints.size() < points.size()) {
// // for(auto const& fp : filteredPoints) {
// // beliefs.emplace(manager,filteredPoints);
// // }
// //}
// }
template class SparseBeliefState<double>;
template bool operator==(SparseBeliefState<double> const&, SparseBeliefState<double> const&);
template class NondeterministicBeliefTracker<double, SparseBeliefState<double>>;
template class ObservationDenseBeliefState<double>;
template bool operator==(ObservationDenseBeliefState<double> const&, ObservationDenseBeliefState<double> const&);
template class NondeterministicBeliefTracker<double, ObservationDenseBeliefState<double>>;
}
}

45
src/storm-pomdp/generator/NondeterministicBeliefTracker.h

@ -1,5 +1,6 @@
#pragma once
#include "storm/models/sparse/Pomdp.h"
#include "storm/adapters/EigenAdapter.h"
namespace storm {
namespace generator {
@ -12,11 +13,19 @@ namespace storm {
ValueType getRisk(uint64_t) const;
void setRiskPerState(std::vector<ValueType> const& risk);
uint64_t getFreshId();
uint32_t getObservation(uint64_t state) const;
uint64_t getObservationOffset(uint64_t state) const;
uint64_t getState(uint32_t obs, uint64_t offset) const;
uint64_t getNumberOfStates() const;
uint64_t numberOfStatesPerObservation(uint32_t observation) const;
private:
storm::models::sparse::Pomdp<ValueType> const& pomdp;
std::vector<ValueType> riskPerState;
std::vector<uint64_t> numberActionsPerObservation;
uint64_t beliefIdCounter = 0;
std::vector<uint64_t> observationOffsetId;
std::vector<std::vector<uint64_t>> statePerObservationAndOffset;
};
template<typename ValueType>
@ -34,6 +43,9 @@ namespace storm {
ValueType getRisk() const;
std::string toString() const;
bool isValid() const;
Eigen::Matrix<ValueType, Eigen::Dynamic, 1> toEigenVector(storm::storage::BitVector const& support) const;
uint64_t getSupportSize() const;
void setSupport(storm::storage::BitVector&) const;
friend bool operator==<>(SparseBeliefState<ValueType> const& lhs, SparseBeliefState<ValueType> const& rhs);
private:
@ -46,20 +58,38 @@ namespace storm {
ValueType risk;
uint64_t id;
uint64_t prevId;
};
template<typename ValueType>
class ObservationDenseBeliefState;
template<typename ValueType>
bool operator==(ObservationDenseBeliefState<ValueType> const& lhs, ObservationDenseBeliefState<ValueType> const& rhs);
template<typename ValueType>
class ObservationDenseBeliefState {
public:
ObservationDenseBeliefState(std::shared_ptr<BeliefStateManager<ValueType>> const& manager, uint64_t state);
ObservationDenseBeliefState update(uint64_t action, uint32_t observation) const;
void update(uint32_t newObservation, std::unordered_set<ObservationDenseBeliefState>& previousBeliefs) const;
std::size_t hash() const noexcept;
ValueType get(uint64_t state) const;
ValueType getRisk() const;
std::string toString() const;
Eigen::Matrix<ValueType, Eigen::Dynamic, 1> toEigenVector(storm::storage::BitVector const& support) const;
uint64_t getSupportSize() const;
void setSupport(storm::storage::BitVector&) const;
friend bool operator==<>(ObservationDenseBeliefState<ValueType> const& lhs, ObservationDenseBeliefState<ValueType> const& rhs);
private:
void updateHelper(std::vector<std::map<uint64_t, ValueType>> const& partialBeliefs, std::vector<ValueType> const& sums, uint64_t currentEntry, uint32_t newObservation, std::unordered_set<ObservationDenseBeliefState<ValueType>>& previousBeliefs) const;
ObservationDenseBeliefState(std::shared_ptr<BeliefStateManager<ValueType>> const& manager, uint32_t observation, std::vector<ValueType> const& belief, std::size_t newHash, ValueType const& risk, uint64_t prevId);
std::shared_ptr<BeliefStateManager<ValueType>> manager;
std::unordered_map<uint64_t, ValueType> belief;
void normalize();
std::vector<ValueType> belief;
uint64_t observation = 0;
std::size_t prestoredhash = 0;
ValueType risk;
uint64_t id;
uint64_t prevId;
};
template<typename ValueType, typename BeliefState>
@ -72,6 +102,7 @@ namespace storm {
uint32_t getCurrentObservation() const;
ValueType getCurrentRisk(bool max=true);
void setRisk(std::vector<ValueType> const& risk);
//void reduce();
private:
@ -91,4 +122,10 @@ namespace std {
return s.hash();
}
};
template<typename T>
struct hash<storm::generator::ObservationDenseBeliefState<T>> {
std::size_t operator()(storm::generator::ObservationDenseBeliefState<T> const& s) const noexcept {
return s.hash();
}
};
}
Loading…
Cancel
Save