#include "storm-pomdp/generator/NondeterministicBeliefTracker.h" #include "storm/utility/ConstantsComparator.h" #include "storm/storage/geometry/nativepolytopeconversion/QuickHull.h" #include "storm/storage/geometry/ReduceVertexCloud.h" #include "storm/utility/vector.h" #include "storm/utility/Stopwatch.h" namespace storm { namespace generator { template BeliefStateManager::BeliefStateManager(storm::models::sparse::Pomdp const& pomdp) : pomdp(pomdp) { numberActionsPerObservation = std::vector(pomdp.getNrObservations(), 0); statePerObservationAndOffset = std::vector>(pomdp.getNrObservations(), std::vector()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { numberActionsPerObservation[pomdp.getObservation(state)] = pomdp.getNumberOfChoices(state); statePerObservationAndOffset[pomdp.getObservation(state)].push_back(state); observationOffsetId.push_back(statePerObservationAndOffset[pomdp.getObservation(state)].size() - 1); } } template uint32_t BeliefStateManager::getObservation(uint64_t state) const { return pomdp.getObservation(state); } template uint64_t BeliefStateManager::getNumberOfStates() const { return pomdp.getNumberOfStates(); } template uint64_t BeliefStateManager::getActionsForObservation(uint32_t observation) const { return numberActionsPerObservation[observation]; } template ValueType BeliefStateManager::getRisk(uint64_t state) const { return riskPerState.at(state); } template storm::models::sparse::Pomdp const& BeliefStateManager::getPomdp() const { return pomdp; } template void BeliefStateManager::setRiskPerState(std::vector const& risk) { riskPerState = risk; } template uint64_t BeliefStateManager::getFreshId() { beliefIdCounter++; return beliefIdCounter; } template uint64_t BeliefStateManager::getObservationOffset(uint64_t state) const { STORM_LOG_ASSERT(state < observationOffsetId.size(), "State " << state << " not a state id"); return observationOffsetId[state]; } template uint64_t BeliefStateManager::numberOfStatesPerObservation(uint32_t observation) const { STORM_LOG_ASSERT(observation < observationOffsetId.size(), "Observation " << observation << " not an observation id"); return statePerObservationAndOffset[observation].size(); } template uint64_t BeliefStateManager::getState(uint32_t obs, uint64_t offset) const { STORM_LOG_ASSERT(obs < statePerObservationAndOffset.size(), "Obs " << obs << " not a known observatoin"); STORM_LOG_ASSERT(offset < statePerObservationAndOffset[obs].size(), "Offset " << offset << " too high for observation " << obs); return statePerObservationAndOffset[obs][offset]; } template SparseBeliefState::SparseBeliefState(std::shared_ptr> const& manager, uint64_t state) : manager(manager), belief(), id(0), prevId(0) { id = manager->getFreshId(); belief[state] = storm::utility::one(); risk = manager->getRisk(state); } template SparseBeliefState::SparseBeliefState(std::shared_ptr> const& manager, std::map const& belief, std::size_t hash, ValueType const& risk, uint64_t prevId) : manager(manager), belief(belief), prestoredhash(hash), risk(risk), id(0), prevId(prevId) { id = manager->getFreshId(); } template ValueType SparseBeliefState::get(uint64_t state) const { return belief.at(state); } template ValueType SparseBeliefState::getRisk() const { return risk; } template std::size_t SparseBeliefState::hash() const noexcept { return prestoredhash; } template bool SparseBeliefState::isValid() const { return !belief.empty(); } template std::string SparseBeliefState::toString() const { std::stringstream sstr; sstr << "id: " << id << "; "; bool first = true; for (auto const& beliefentry : belief) { if (!first) { sstr << ", "; } else { first = false; } sstr << beliefentry.first << " : " << beliefentry.second; } sstr << " (from " << prevId << ")"; return sstr.str(); } template bool operator==(SparseBeliefState const& lhs, SparseBeliefState const& rhs) { if (lhs.hash() != rhs.hash()) { return false; } if (lhs.belief.size() != rhs.belief.size()) { return false; } storm::utility::ConstantsComparator cmp(0.00001, true); auto lhsIt = lhs.belief.begin(); auto rhsIt = rhs.belief.begin(); while(lhsIt != lhs.belief.end()) { if (lhsIt->first != rhsIt->first || !cmp.isEqual(lhsIt->second, rhsIt->second)) { return false; } ++lhsIt; ++rhsIt; } return true; //return std::equal(lhs.belief.begin(), lhs.belief.end(), rhs.belief.begin()); } template void SparseBeliefState::update(uint32_t newObservation, std::unordered_set>& previousBeliefs) const { updateHelper({{}}, {storm::utility::zero()}, belief.begin(), newObservation, previousBeliefs); } template uint64_t SparseBeliefState::getSupportSize() const { return manager->getNumberOfStates(); } template std::map const& SparseBeliefState::getBeliefMap() const { return belief; } template void SparseBeliefState::setSupport(storm::storage::BitVector& support) const { for(auto const& entry : belief) { support.set(entry.first, true); } } template void SparseBeliefState::updateHelper(std::vector> const& partialBeliefs, std::vector const& sums, typename std::map::const_iterator nextStateIt, uint32_t newObservation, std::unordered_set>& previousBeliefs) const { if(nextStateIt == belief.end()) { for (uint64_t i = 0; i < partialBeliefs.size(); ++i) { auto const& partialBelief = partialBeliefs[i]; auto const& sum = sums[i]; if (storm::utility::isZero(sum)) { continue; } std::size_t newHash = 0; ValueType risk = storm::utility::zero(); std::map finalBelief; for (auto &entry : partialBelief) { assert(!storm::utility::isZero(sum)); finalBelief[entry.first] = entry.second / sum; //boost::hash_combine(newHash, std::hash()(entry.second)); boost::hash_combine(newHash, entry.first); risk += entry.second / sum * manager->getRisk(entry.first); } previousBeliefs.insert(SparseBeliefState(manager, finalBelief, newHash, risk, id)); } } else { uint64_t state = nextStateIt->first; auto newNextStateIt = nextStateIt; newNextStateIt++; std::vector> newPartialBeliefs; std::vector newSums; for (uint64_t i = 0; i < partialBeliefs.size(); ++i) { for (auto row = manager->getPomdp().getNondeterministicChoiceIndices()[state]; row < manager->getPomdp().getNondeterministicChoiceIndices()[state + 1]; ++row) { std::map newPartialBelief = partialBeliefs[i]; ValueType newSum = sums[i]; for (auto const &transition : manager->getPomdp().getTransitionMatrix().getRow(row)) { if (newObservation != manager->getPomdp().getObservation(transition.getColumn())) { continue; } if (newPartialBelief.count(transition.getColumn()) == 0) { newPartialBelief[transition.getColumn()] = transition.getValue() * nextStateIt->second; } else { newPartialBelief[transition.getColumn()] += transition.getValue() * nextStateIt->second; } newSum += transition.getValue() * nextStateIt->second; } newPartialBeliefs.push_back(newPartialBelief); newSums.push_back(newSum); } } updateHelper(newPartialBeliefs, newSums, newNextStateIt, newObservation, previousBeliefs); } } template bool operator==(ObservationDenseBeliefState const& lhs, ObservationDenseBeliefState const& rhs) { if (lhs.hash() != rhs.hash()) { return false; } if (lhs.observation != rhs.observation) { return false; } storm::utility::ConstantsComparator cmp(0.00001, true); auto lhsIt = lhs.belief.begin(); auto rhsIt = rhs.belief.begin(); while(lhsIt != lhs.belief.end()) { if (!cmp.isEqual(*lhsIt, *rhsIt)) { return false; } ++lhsIt; ++rhsIt; } return true; //return std::equal(lhs.belief.begin(), lhs.belief.end(), rhs.belief.begin()); } template ObservationDenseBeliefState::ObservationDenseBeliefState(std::shared_ptr> const& manager, uint64_t state) : manager(manager), belief(manager->numberOfStatesPerObservation(manager->getObservation(state))) { observation = manager->getObservation(state); belief[manager->getObservationOffset(state)] = storm::utility::one(); } template ObservationDenseBeliefState::ObservationDenseBeliefState(std::shared_ptr> const& manager, uint32_t observation, std::vector const& belief, std::size_t newHash, ValueType const& risk, uint64_t prevId) : manager(manager), belief(belief), observation(observation), prestoredhash(newHash), risk(risk), id(manager->getFreshId()), prevId(prevId) { // Intentionally left empty. } template void ObservationDenseBeliefState::update(uint32_t newObservation, std::unordered_set& previousBeliefs) const { updateHelper({{}}, {storm::utility::zero()}, 0, newObservation, previousBeliefs); } template void ObservationDenseBeliefState::updateHelper(std::vector> const& partialBeliefs, std::vector const& sums, uint64_t currentEntry, uint32_t newObservation, std::unordered_set>& previousBeliefs) const { while(currentEntry != belief.size() && storm::utility::isZero(belief[currentEntry])) { currentEntry++; } if(currentEntry == belief.size()) { for (uint64_t i = 0; i < partialBeliefs.size(); ++i) { auto const& partialBelief = partialBeliefs[i]; auto const& sum = sums[i]; if (storm::utility::isZero(sum)) { continue; } std::size_t newHash = 0; ValueType risk = storm::utility::zero(); std::vector finalBelief(manager->numberOfStatesPerObservation(observation), storm::utility::zero()); for (auto &entry : partialBelief) { assert(!storm::utility::isZero(sum)); finalBelief[manager->getObservationOffset(entry.first)] = (entry.second / sum); //boost::hash_combine(newHash, std::hash()(entry.second)); boost::hash_combine(newHash, entry.first); risk += entry.second / sum * manager->getRisk(entry.first); } previousBeliefs.insert(ObservationDenseBeliefState(manager, newObservation, finalBelief, newHash, risk, id)); } } else { uint64_t state = manager->getState(observation,currentEntry); uint64_t nextEntry = currentEntry + 1; std::vector> newPartialBeliefs; std::vector newSums; for (uint64_t i = 0; i < partialBeliefs.size(); ++i) { for (auto row = manager->getPomdp().getNondeterministicChoiceIndices()[state]; row < manager->getPomdp().getNondeterministicChoiceIndices()[state + 1]; ++row) { std::map newPartialBelief = partialBeliefs[i]; ValueType newSum = sums[i]; for (auto const &transition : manager->getPomdp().getTransitionMatrix().getRow(row)) { if (newObservation != manager->getPomdp().getObservation(transition.getColumn())) { continue; } if (newPartialBelief.count(transition.getColumn()) == 0) { newPartialBelief[transition.getColumn()] = transition.getValue() * belief[currentEntry]; } else { newPartialBelief[transition.getColumn()] += transition.getValue() * belief[currentEntry]; } newSum += transition.getValue() * belief[currentEntry]; } newPartialBeliefs.push_back(newPartialBelief); newSums.push_back(newSum); } } updateHelper(newPartialBeliefs, newSums, nextEntry, newObservation, previousBeliefs); } } template std::size_t ObservationDenseBeliefState::hash() const noexcept { return prestoredhash; } template ValueType ObservationDenseBeliefState::get(uint64_t state) const { if (manager->getObservation(state) != state) { return storm::utility::zero(); } return belief[manager->getObservationOffset(state)]; } template ValueType ObservationDenseBeliefState::getRisk() const { return risk; } template uint64_t ObservationDenseBeliefState::getSupportSize() const { return belief.size(); } template void ObservationDenseBeliefState::setSupport(storm::storage::BitVector& support) const { storm::utility::vector::setNonzeroIndices(belief, support); } template std::string ObservationDenseBeliefState::toString() const { std::stringstream sstr; sstr << "id: " << id << "; "; bool first = true; uint64_t i = 0; for (auto const& beliefentry : belief) { if (!storm::utility::isZero(beliefentry)) { if (!first) { sstr << ", "; } else { first = false; } sstr << manager->getState(observation, i) << " : " << beliefentry; } i++; } sstr << " (from " << prevId << ")"; return sstr.str(); } template NondeterministicBeliefTracker::NondeterministicBeliefTracker(storm::models::sparse::Pomdp const& pomdp, typename NondeterministicBeliefTracker::Options options ) : pomdp(pomdp), manager(std::make_shared>(pomdp)), beliefs(), options(options) { // } template bool NondeterministicBeliefTracker::reset(uint32_t observation) { bool hit = false; for (auto state : pomdp.getInitialStates()) { if (observation == pomdp.getObservation(state)) { hit = true; beliefs.emplace(manager, state); } } lastObservation = observation; return hit; } template bool NondeterministicBeliefTracker::track(uint64_t newObservation) { STORM_LOG_THROW(!beliefs.empty(), storm::exceptions::InvalidOperationException, "Cannot track without a belief (need to reset)."); std::unordered_set newBeliefs; storm::utility::Stopwatch trackTimer(true); for (auto const& belief : beliefs) { belief.update(newObservation, newBeliefs); if (options.trackTimeOut > 0 && trackTimer.getTimeInMilliseconds() > options.trackTimeOut) { return false; } } beliefs = newBeliefs; lastObservation = newObservation; return !beliefs.empty(); } template ValueType NondeterministicBeliefTracker::getCurrentRisk(bool max) { STORM_LOG_THROW(!beliefs.empty(), storm::exceptions::InvalidOperationException, "Risk is only defined for beliefs (run reset() first)."); ValueType result = beliefs.begin()->getRisk(); if (max) { for (auto const& belief : beliefs) { if (belief.getRisk() > result) { result = belief.getRisk(); } } } else { for (auto const& belief : beliefs) { if (belief.getRisk() < result) { result = belief.getRisk(); } } } return result; } template void NondeterministicBeliefTracker::setRisk(std::vector const& risk) { manager->setRiskPerState(risk); } template std::unordered_set const& NondeterministicBeliefTracker::getCurrentBeliefs() const { return beliefs; } template uint32_t NondeterministicBeliefTracker::getCurrentObservation() const { return lastObservation; } template uint64_t NondeterministicBeliefTracker::getNumberOfBeliefs() const { return beliefs.size(); } template uint64_t NondeterministicBeliefTracker::getCurrentDimension() const { storm::storage::BitVector support(beliefs.begin()->getSupportSize()); for(auto const& belief : beliefs) { belief.setSupport(support); } return support.getNumberOfSetBits(); } // template uint64_t NondeterministicBeliefTracker::reduce() { reductionTimedOut = false; std::shared_ptr solverFactory = std::make_shared(); storm::storage::geometry::ReduceVertexCloud rvc(solverFactory, options.wiggle, options.timeOut); std::vector> points; std::vector::iterator> iterators; for (auto it = beliefs.begin(); it != beliefs.end(); ++it) { // TODO get rid of the getBeliefMap function. points.push_back(it->getBeliefMap()); iterators.push_back(it); } auto res = rvc.eliminate(points, pomdp.getNumberOfStates()); storm::storage::BitVector eliminate = ~res.first; if (res.second) { reductionTimedOut = true; } auto selectedIterators = storm::utility::vector::filterVector(iterators, eliminate); for (auto iter : selectedIterators) { beliefs.erase(iter); } return eliminate.getNumberOfSetBits(); } template bool NondeterministicBeliefTracker::hasTimedOut() const { return reductionTimedOut; } template class SparseBeliefState; template bool operator==(SparseBeliefState const&, SparseBeliefState const&); template class NondeterministicBeliefTracker>; //template class ObservationDenseBeliefState; //template bool operator==(ObservationDenseBeliefState const&, ObservationDenseBeliefState const&); //template class NondeterministicBeliefTracker>; template class SparseBeliefState; template bool operator==(SparseBeliefState const&, SparseBeliefState const&); template class NondeterministicBeliefTracker>; } }