Browse Source

Fixes for scoring observations

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
4ea452854f
  1. 17
      src/storm-pomdp/builder/BeliefMdpExplorer.h
  2. 30
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  3. 11
      src/storm-pomdp/storage/BeliefManager.h

17
src/storm-pomdp/builder/BeliefMdpExplorer.h

@ -456,10 +456,14 @@ namespace storm {
return upperValueBounds[getCurrentMdpState()]; return upperValueBounds[getCurrentMdpState()];
} }
/// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for minimizing properties)
/// Or that the pomdpLowerValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds);
} }
/// This requires that we either over-approximate the scheduler behavior in this direction (e.g. grid approximation for maximizing properties)
/// Or that the pomdpUpperValueBounds are based on a memoryless scheduler. Otherwise, such a triangulation would not be valid.
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const {
return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds);
} }
@ -507,7 +511,7 @@ namespace storm {
// Intentionally left empty. // Intentionally left empty.
} }
void join(SuccessorObservationInformation other) {
void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons)
observationProbability += other.observationProbability; observationProbability += other.observationProbability;
maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
successorWithObsCount += other.successorWithObsCount; successorWithObsCount += other.successorWithObsCount;
@ -515,17 +519,19 @@ namespace storm {
ValueType observationProbability; /// The probability we move to the corresponding observation. ValueType observationProbability; /// The probability we move to the corresponding observation.
ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation.
uint64_t successorWithObsCount; /// The number of successors with this observation
uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation
typename BeliefManagerType::BeliefSupportType support;
}; };
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
} }
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> gatheredSuccessorObservations) {
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) {
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
auto const& beliefId = getBeliefId(entry.getColumn()); auto const& beliefId = getBeliefId(entry.getColumn());
@ -537,6 +543,7 @@ namespace storm {
// There already is an entry for this observation, so join the two informations // There already is an entry for this observation, so join the two informations
obsInsertion.first->second.join(info); obsInsertion.first->second.join(info);
} }
beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
} }
} }
} }

30
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -283,18 +283,23 @@ namespace storm {
*/ */
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
auto n = storm::utility::convertNumber<ValueType, uint64_t>(info.support.size());
auto one = storm::utility::one<ValueType>(); auto one = storm::utility::one<ValueType>();
// Create the rating for this observation at this choice from the given info
ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
// At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
// Normalize the rating so that it ranges from 0 to 1, where
// 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
// Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
return obsChoiceRating;
if (storm::utility::isOne(n)) {
// If the belief is Dirac, it has to be approximated precisely.
// In this case, we return the best possible rating
return one;
} else {
// Create the rating for this observation at this choice from the given info
ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
// At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
// Normalize the rating so that it ranges from 0 to 1, where
// 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
// Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
return obsChoiceRating;
}
} }
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
@ -487,6 +492,9 @@ namespace storm {
STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
// We did not explore this successor state. Get a bound on the "missing" value // We did not explore this successor state. Get a bound on the "missing" value
truncationProbability += successor.second; truncationProbability += successor.second;
// Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other
// under-approximation values. In general, this does not yield a sound underapproximation anymore.
// However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler.
truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first));
} }
} }

11
src/storm-pomdp/storage/BeliefManager.h

@ -3,6 +3,7 @@
#include <unordered_map> #include <unordered_map>
#include <boost/optional.hpp> #include <boost/optional.hpp>
#include <boost/container/flat_map.hpp> #include <boost/container/flat_map.hpp>
#include <boost/container/flat_set.hpp>
#include "storm/adapters/RationalNumberAdapter.h" #include "storm/adapters/RationalNumberAdapter.h"
#include "storm/utility/macros.h" #include "storm/utility/macros.h"
#include "storm/exceptions/UnexpectedException.h" #include "storm/exceptions/UnexpectedException.h"
@ -16,6 +17,7 @@ namespace storm {
typedef typename PomdpType::ValueType ValueType; typedef typename PomdpType::ValueType ValueType;
typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation) typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation)
typedef boost::container::flat_set<StateType> BeliefSupportType;
typedef uint64_t BeliefId; typedef uint64_t BeliefId;
BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
@ -99,7 +101,7 @@ namespace storm {
} }
uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { uint64_t getBeliefNumberOfChoices(BeliefId beliefId) {
auto belief = getBelief(beliefId);
auto const& belief = getBelief(beliefId);
return pomdp.getNumberOfChoices(belief.begin()->first); return pomdp.getNumberOfChoices(belief.begin()->first);
} }
@ -115,6 +117,13 @@ namespace storm {
} }
} }
void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) {
auto const& belief = getBelief(beliefId);
for (auto const& entry : belief) {
support.insert(entry.first);
}
}
BeliefId getNumberOfBeliefIds() const { BeliefId getNumberOfBeliefIds() const {
return beliefs.size(); return beliefs.size();
} }

Loading…
Cancel
Save