Browse Source

Fixes for scoring observations

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
4ea452854f
  1. 15
      src/storm-pomdp/builder/BeliefMdpExplorer.h
  2. 12
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  3. 11
      src/storm-pomdp/storage/BeliefManager.h

15
src/storm-pomdp/builder/BeliefMdpExplorer.h

@ -456,10 +456,14 @@ namespace storm {
return upperValueBounds[getCurrentMdpState()];
}
/// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties)
/// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
}
/// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties)
/// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
}
@ -507,7 +511,7 @@ namespace storm {
// Intentionally left empty.
}
void join(SuccessorObservationInformation other) {
void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons)
observationProbability += other.observationProbability;
maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
successorWithObsCount += other.successorWithObsCount;
@ -515,17 +519,19 @@ namespace storm {
ValueType observationProbability; /// The probability we move to the corresponding observation.
ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation.
uint64_t successorWithObsCount; /// The number of successors with this observation
uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation
typename BeliefManagerType::BeliefSupportType support;
};
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
}
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
auto const& beliefId = getBeliefId(entry.getColumn());
@ -537,6 +543,7 @@ namespace storm {
// There already is an entry for this observation, so join the two informations
obsInsertion.first->second.join(info);
}
beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
}
}
}

12
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -283,9 +283,13 @@ namespace storm {
*/
template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
auto n = storm::utility::convertNumber<ValueType, uint64_t>(info.support.size());
auto one = storm::utility::one<ValueType>();
if (storm::utility::isOne(n)) {
// If the belief is Dirac, it has to be approximated precisely.
// In this case, we return the best possible rating
return one;
} else {
// Create the rating for this observation at this choice from the given info
ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
// At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
@ -296,6 +300,7 @@ namespace storm {
obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
return obsChoiceRating;
}
}
template<typename PomdpModelType, typename BeliefValueType>
std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution) {
@ -487,6 +492,9 @@ namespace storm {
STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
// We did not explore this successor state. Get a bound on the "missing" value
truncationProbability += successor.second;
// Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other
// under-approximation values. In general, this does not yield a sound underapproximation anymore.
// However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler.
truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first));
}
}

11
src/storm-pomdp/storage/BeliefManager.h

@ -3,6 +3,7 @@
#include <unordered_map>
#include <boost/optional.hpp>
#include <boost/container/flat_map.hpp>
#include <boost/container/flat_set.hpp>
#include "storm/adapters/RationalNumberAdapter.h"
#include "storm/utility/macros.h"
#include "storm/exceptions/UnexpectedException.h"
@ -16,6 +17,7 @@ namespace storm {
typedef typename PomdpType::ValueType ValueType;
typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation)
typedef boost::container::flat_set<StateType> BeliefSupportType;
typedef uint64_t BeliefId;
BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
@ -99,7 +101,7 @@ namespace storm {
}
uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
auto belief = getBelief(beliefId);
auto const& belief = getBelief(beliefId);
return pomdp.getNumberOfChoices(belief.begin()->first);
}
@ -115,6 +117,13 @@ namespace storm {
}
}
void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) {
auto const& belief = getBelief(beliefId);
for (auto const& entry : belief) {
support.insert(entry.first);
}
}
BeliefId getNumberOfBeliefIds() const {
return beliefs.size();
}

Loading…
Cancel
Save