Browse Source

Scaling the rating of an observation with the current resolution.

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
c3847d05af
  1. 16
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 4
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

16
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -264,21 +264,23 @@ namespace storm {
* Here, 0 means a bad approximation and 1 means a good approximation. * Here, 0 means a bad approximation and 1 means a good approximation.
*/ */
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) {
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount); auto n = storm::utility::convertNumber<ValueType>(info.successorWithObsCount);
auto one = storm::utility::one<ValueType>(); auto one = storm::utility::one<ValueType>();
// Create the actual rating for this observation at this choice from the given info
// Create the rating for this observation at this choice from the given info
ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
// At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
// Normalize the rating so that it ranges from 0 to 1, where // Normalize the rating so that it ranges from 0 to 1, where
// 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
obsChoiceRating = (obsChoiceRating * n - one) / (n - one); obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
// Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
return obsChoiceRating; return obsChoiceRating;
} }
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation) {
std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution) {
uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices();
std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>()); std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>());
@ -289,7 +291,7 @@ namespace storm {
overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
for (auto const& obsInfo : gatheredSuccessorObservations) { for (auto const& obsInfo : gatheredSuccessorObservations) {
auto const& obs = obsInfo.first; auto const& obs = obsInfo.first;
ValueType obsChoiceRating = rateObservation(obsInfo.second);
ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
// The rating of the observation will be the minimum over all choice-based observation ratings // The rating of the observation will be the minimum over all choice-based observation ratings
resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
@ -303,6 +305,8 @@ namespace storm {
STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given");
STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero<ValueType>(), "Can not refine with negative aggressiveness."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero<ValueType>(), "Can not refine with negative aggressiveness.");
STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one<ValueType>(), "Refinement with aggressiveness > 1 is invalid."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one<ValueType>(), "Refinement with aggressiveness > 1 is invalid.");
uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << ".");
statistics.overApproximationBuildTime.start(); statistics.overApproximationBuildTime.start();
storm::storage::BitVector refinedObservations; storm::storage::BitVector refinedObservations;
@ -315,7 +319,7 @@ namespace storm {
} }
} else { } else {
// If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement.
auto obsRatings = getObservationRatings(overApproximation);
auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution);
ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
// Potentially increase the aggressiveness so that at least one observation actually gets refinement. // Potentially increase the aggressiveness so that at least one observation actually gets refinement.
*refinementAggressiveness = std::max(minRating, *refinementAggressiveness); *refinementAggressiveness = std::max(minRating, *refinementAggressiveness);
@ -356,7 +360,7 @@ namespace storm {
overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations);
for (auto const& obsInfo : gatheredSuccessorObservations) { for (auto const& obsInfo : gatheredSuccessorObservations) {
if (refinedObservations.get(obsInfo.first)) { if (refinedObservations.get(obsInfo.first)) {
ValueType obsRating = rateObservation(obsInfo.second);
ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution);
stateActionRating = std::min(stateActionRating, obsRating); stateActionRating = std::min(stateActionRating, obsRating);
} }
} }

4
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -84,9 +84,9 @@ namespace storm {
*/ */
void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation); void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info);
ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);
std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation);
std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution);
struct Statistics { struct Statistics {
Statistics(); Statistics();

Loading…
Cancel
Save