Browse Source

Storing the observation resolutions as a float so that we can increase the resolution more accurately with a non-integer factor

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
55c4408c6a
  1. 41
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 7
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
  3. 33
      src/storm-pomdp/storage/BeliefManager.h

41
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -191,7 +191,7 @@ namespace storm {
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) {
if (options.discretize) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit);
std::vector<BeliefValueType> observationResolutionVector(pomdp.getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
if (rewardModelName) {
manager->setRewardModel(rewardModelName);
@ -244,12 +244,12 @@ namespace storm {
statistics.refinementSteps = 0;
// Set up exploration data
std::vector<uint64_t> observationResolutionVector;
std::vector<BeliefValueType> observationResolutionVector;
std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
std::shared_ptr<ExplorerType> overApproximation;
HeuristicParameters overApproxHeuristicPar;
if (options.discretize) { // Setup and build first OverApproximation
observationResolutionVector = std::vector<uint64_t>(pomdp.getNrObservations(), options.resolutionInit);
observationResolutionVector = std::vector<BeliefValueType>(pomdp.getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
if (rewardModelName) {
overApproxBeliefManager->setRewardModel(rewardModelName);
@ -404,32 +404,33 @@ namespace storm {
* Here, 0 means a bad approximation and 1 means a good approximation.
*/
template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) {
auto n = storm::utility::convertNumber<ValueType, uint64_t>(info.support.size());
auto one = storm::utility::one<ValueType>();
BeliefValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) {
auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
auto one = storm::utility::one<BeliefValueType>();
if (storm::utility::isOne(n)) {
// If the belief is Dirac, it has to be approximated precisely.
// In this case, we return the best possible rating
return one;
} else {
// Create the rating for this observation at this choice from the given info
ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability;
BeliefValueType obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
// At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
// Normalize the rating so that it ranges from 0 to 1, where
// 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
// Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
obsChoiceRating *= storm::utility::convertNumber<ValueType>(observationResolution) / storm::utility::convertNumber<ValueType>(maxResolution);
obsChoiceRating *= observationResolution / maxResolution;
return obsChoiceRating;
}
}
template<typename PomdpModelType, typename BeliefValueType>
std::vector<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution) {
std::vector<BeliefValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector) {
uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
std::vector<ValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<ValueType>());
std::vector<BeliefValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<BeliefValueType>());
std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
@ -444,7 +445,7 @@ namespace storm {
overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
for (auto const& obsInfo : gatheredSuccessorObservations) {
auto const& obs = obsInfo.first;
ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
// The rating of the observation will be the minimum over all choice-based observation ratings
resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
@ -476,14 +477,11 @@ namespace storm {
}
template<typename PomdpModelType, typename BeliefValueType>
bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
// Detect whether the refinement reached a fixpoint.
bool fixPoint = true;
// current maximal resolution (needed for refinement heuristic)
uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
statistics.overApproximationBuildTime.start();
storm::storage::BitVector refinedObservations;
if (!refine) {
@ -497,7 +495,8 @@ namespace storm {
// If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy
overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
// We also need to find out which observation resolutions needs refinement.
auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution);
// current maximal resolution (needed for refinement heuristic)
auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
// If there is a score < 1, we have not reached a fixpoint, yet
if (std::any_of(obsRatings.begin(), obsRatings.end(), [](ValueType const& value){return value < storm::utility::one<ValueType>();})) {
fixPoint = false;
@ -506,17 +505,17 @@ namespace storm {
STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
for (auto const& obs : refinedObservations) {
// Increment the resolution at the refined observations.
// Detect overflows properly.
// Use storm's rational number to detect overflows properly.
storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) * storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
if (newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<uint64_t>::max())) {
observationResolutionVector[obs] = std::numeric_limits<uint64_t>::max();
if (newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber, uint64_t>(std::numeric_limits<uint32_t>::max())) {
observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType, uint64_t>(std::numeric_limits<uint32_t>::max());
} else {
observationResolutionVector[obs] = storm::utility::convertNumber<uint64_t>(newObsResolutionAsRational);
observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
}
}
overApproximation->restartExploration();
}
statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
statistics.overApproximationMaxResolution = storm::utility::convertNumber<uint64_t>(storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end())));
// Start exploration
storm::utility::Stopwatch explorationTime;

7
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -49,7 +49,6 @@ namespace storm {
*
* @param targetObservations set of target observations
* @param min true if minimum value is to be computed
* @param observationResolutionVector vector containing the resolution to be used for each observation
* @param computeRewards true if rewards are to be computed, false if probability is computed
* @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
* @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
@ -79,7 +78,7 @@ namespace storm {
* Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
* Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp)
*/
bool buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
bool buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
/**
* Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
@ -87,9 +86,9 @@ namespace storm {
*/
bool buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);
BeliefValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution);
std::vector<ValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<uint64_t> const& observationResolutionVector, uint64_t const& maxResolution);
std::vector<BeliefValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector);
struct Statistics {
Statistics();

33
src/storm-pomdp/storage/BeliefManager.h

@ -111,7 +111,7 @@ namespace storm {
return pomdp.getNumberOfChoices(belief.begin()->first);
}
Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) {
Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution) {
return triangulateBelief(getBelief(beliefId), resolution);
}
@ -134,7 +134,7 @@ namespace storm {
return beliefs.size();
}
std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<BeliefValueType> const& observationResolutions) {
return expandInternal(beliefId, actionIndex, observationResolutions);
}
@ -293,10 +293,10 @@ namespace storm {
}
};
void triangulateBeliefFreudenthal(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) {
void triangulateBeliefFreudenthal(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) {
STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0");
STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution");
StateType numEntries = belief.size();
auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
// Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N
// Variable names are mostly based on the paper
@ -307,12 +307,12 @@ namespace storm {
qsRow.reserve(numEntries);
std::vector<StateType> toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices
toOriginalIndicesMap.reserve(numEntries);
BeliefValueType x = convResolution;
BeliefValueType x = resolution;
for (auto const& entry : belief) {
qsRow.push_back(storm::utility::floor(x)); // v
sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v
toOriginalIndicesMap.push_back(entry.first);
x -= entry.second * convResolution;
x -= entry.second * resolution;
}
// Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler
qsRow.push_back(storm::utility::zero<BeliefValueType>());
@ -339,7 +339,7 @@ namespace storm {
for (StateType j = 0; j < numEntries; ++j) {
BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
if (!cc.isZero(gridPointEntry)) {
gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution;
gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution;
}
}
result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
@ -348,17 +348,17 @@ namespace storm {
}
}
void triangulateBeliefDynamic(BeliefType const& belief, uint64_t const& resolution, Triangulation& result) {
void triangulateBeliefDynamic(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) {
// Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 i N} is minimal
uint64_t finalResolution = resolution;
STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution");
BeliefValueType finalResolution = resolution;
uint64_t finalResolutionMisses = belief.size() + 1;
// We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these)
for (uint64_t currResolution = resolution; currResolution > resolution / 2; --currResolution) {
for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) {
uint64_t currResMisses = 0;
BeliefValueType currResolutionConverted = storm::utility::convertNumber<BeliefValueType>(currResolution);
bool continueWithNextResolution = false;
for (auto const& belEntry : belief) {
BeliefValueType product = belEntry.second * currResolutionConverted;
BeliefValueType product = belEntry.second * currResolution;
if (!cc.isZero(product - storm::utility::round(product))) {
++currResMisses;
if (currResMisses >= finalResolutionMisses) {
@ -384,7 +384,7 @@ namespace storm {
triangulateBeliefFreudenthal(belief, finalResolution, result);
}
Triangulation triangulateBelief(BeliefType const& belief, uint64_t const& resolution) {
Triangulation triangulateBelief(BeliefType const& belief, BeliefValueType const& resolution) {
STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
Triangulation result;
// Quickly triangulate Dirac beliefs
@ -392,12 +392,13 @@ namespace storm {
result.weights.push_back(storm::utility::one<BeliefValueType>());
result.gridPoints.push_back(getOrAddBeliefId(belief));
} else {
auto ceiledResolution = storm::utility::ceil<BeliefValueType>(resolution);
switch (triangulationMode) {
case TriangulationMode::Static:
triangulateBeliefFreudenthal(belief, resolution, result);
triangulateBeliefFreudenthal(belief, ceiledResolution, result);
break;
case TriangulationMode::Dynamic:
triangulateBeliefDynamic(belief, resolution, result);
triangulateBeliefDynamic(belief, ceiledResolution, result);
break;
default:
STORM_LOG_ASSERT(false, "Invalid triangulation mode.");
@ -407,7 +408,7 @@ namespace storm {
return result;
}
std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<uint64_t>> const& observationTriangulationResolutions = boost::none) {
std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<BeliefValueType>> const& observationTriangulationResolutions = boost::none) {
std::vector<std::pair<BeliefId, ValueType>> destinations;
BeliefType belief = getBelief(beliefId);

Loading…
Cancel
Save