Browse Source

Started to integrate new refinement options.

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
37490a8eca
  1. 17
      src/storm-pomdp-cli/storm-pomdp.cpp
  2. 214
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  3. 35
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

17
src/storm-pomdp-cli/storm-pomdp.cpp

@ -103,14 +103,17 @@ namespace storm {
STORM_PRINT_AND_LOG("Applying grid approximation... "); STORM_PRINT_AND_LOG("Applying grid approximation... ");
auto const& gridSettings = storm::settings::getModule<storm::settings::modules::GridApproximationSettings>(); auto const& gridSettings = storm::settings::getModule<storm::settings::modules::GridApproximationSettings>();
typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<ValueType>>::Options options; typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<ValueType>>::Options options;
options.initialGridResolution = gridSettings.getGridResolution();
options.explorationThreshold = storm::utility::convertNumber<ValueType>(gridSettings.getExplorationThreshold());
options.doRefinement = gridSettings.isRefineSet();
options.refinementPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getRefinementPrecision());
options.numericPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getNumericPrecision());
options.cacheSubsimplices = gridSettings.isCacheSimplicesSet();
std::cout << "TODO: create and read from new settings!" << std::endl;
// options.initialGridResolution = gridSettings.getGridResolution();
// options.explorationThreshold = storm::utility::convertNumber<ValueType>(gridSettings.getExplorationThreshold());
options.refine = gridSettings.isRefineSet();
options.unfold = true;
options.discretize = true;
// options.refinementPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getRefinementPrecision());
// options.numericPrecision = storm::utility::convertNumber<ValueType>(gridSettings.getNumericPrecision());
// options.cacheSubsimplices = gridSettings.isCacheSimplicesSet();
if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) { if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) {
options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
//options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold();
} }
if (storm::NumberTraits<ValueType>::IsExact) { if (storm::NumberTraits<ValueType>::IsExact) {
if (gridSettings.isNumericPrecisionSetFromDefault()) { if (gridSettings.isNumericPrecisionSetFromDefault()) {

214
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -29,13 +29,22 @@ namespace storm {
namespace modelchecker { namespace modelchecker {
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() { ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() {
initialGridResolution = 10;
explorationThreshold = storm::utility::zero<ValueType>();
doRefinement = true;
refinementPrecision = storm::utility::convertNumber<ValueType>(1e-4);
discretize = false;
unfold = false;
refine = false;
resolutionInit = 2;
resolutionFactor = storm::utility::convertNumber<ValueType,uint64_t>(2);
sizeThresholdInit = 0; // automatic
sizeThresholdFactor = 4;
gapThresholdInit = storm::utility::convertNumber<ValueType>(0.1);
gapThresholdFactor = storm::utility::convertNumber<ValueType>(0.25);
optimalChoiceValueThresholdInit = storm::utility::convertNumber<ValueType>(1e-3);
optimalChoiceValueThresholdFactor = storm::utility::one<ValueType>();
obsThresholdInit = storm::utility::convertNumber<ValueType>(0.1);
obsThresholdIncrementFactor = storm::utility::convertNumber<ValueType>(0.1);
numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9); numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
cacheSubsimplices = false;
beliefMdpSizeThreshold = boost::none;
} }
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
@ -69,6 +78,7 @@ namespace storm {
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) { typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) {
STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given.");
// Reset all collected statistics // Reset all collected statistics
statistics = Statistics(); statistics = Statistics();
// Extract the relevant information from the formula // Extract the relevant information from the formula
@ -96,7 +106,7 @@ namespace storm {
STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
} }
if (options.doRefinement) {
if (options.refine) {
refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
} else { } else {
computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
@ -126,7 +136,7 @@ namespace storm {
// The overapproximation MDP: // The overapproximation MDP:
if (statistics.overApproximationStates) { if (statistics.overApproximationStates) {
stream << "# Number of states in the "; stream << "# Number of states in the ";
if (options.doRefinement) {
if (options.refine) {
stream << "final "; stream << "final ";
} }
stream << "grid MDP for the over-approximation: "; stream << "grid MDP for the over-approximation: ";
@ -142,7 +152,7 @@ namespace storm {
// The underapproximation MDP: // The underapproximation MDP:
if (statistics.underApproximationStates) { if (statistics.underApproximationStates) {
stream << "# Number of states in the "; stream << "# Number of states in the ";
if (options.doRefinement) {
if (options.refine) {
stream << "final "; stream << "final ";
} }
stream << "grid MDP for the under-approximation: "; stream << "grid MDP for the under-approximation: ";
@ -158,28 +168,21 @@ namespace storm {
stream << "##########################################" << std::endl; stream << "##########################################" << std::endl;
} }
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) { void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
}
uint64_t underApproxSizeThreshold = 0;
{ // Overapproximation
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
if (options.discretize) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.resolutionInit);
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision); auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) { if (rewardModelName) {
manager->setRewardModel(rewardModelName); manager->setRewardModel(rewardModelName);
} }
auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds); auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
HeuristicParameters heuristicParameters; HeuristicParameters heuristicParameters;
heuristicParameters.gapThreshold = storm::utility::convertNumber<ValueType>(options.explorationThreshold);
heuristicParameters.observationThreshold = storm::utility::zero<ValueType>(); // Not relevant without refinement
heuristicParameters.sizeThreshold = std::numeric_limits<uint64_t>::max();
heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber<ValueType>(1e-4);
heuristicParameters.gapThreshold = options.gapThresholdInit;
heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement
heuristicParameters.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx); buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx);
if (approx->hasComputedValues()) { if (approx->hasComputedValues()) {
@ -187,22 +190,23 @@ namespace storm {
approx->getExploredMdp()->printModelInformationToStream(std::cout); approx->getExploredMdp()->printModelInformationToStream(std::cout);
ValueType& resultValue = min ? result.lowerBound : result.upperBound; ValueType& resultValue = min ? result.lowerBound : result.upperBound;
resultValue = approx->getComputedValueAtInitialState(); resultValue = approx->getComputedValueAtInitialState();
underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold);
} }
} }
{ // Underapproximation (Uses a fresh Belief manager)
if (options.unfold) { // Underapproximation (uses a fresh Belief manager)
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision); auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) { if (rewardModelName) {
manager->setRewardModel(rewardModelName); manager->setRewardModel(rewardModelName);
} }
auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds); auto approx = std::make_shared<ExplorerType>(manager, lowerPomdpValueBounds, upperPomdpValueBounds);
if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) {
underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
}
if (underApproxSizeThreshold == 0) {
underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build)
HeuristicParameters heuristicParameters;
heuristicParameters.gapThreshold = options.gapThresholdInit;
heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
heuristicParameters.sizeThreshold = options.sizeThresholdInit;
if (heuristicParameters.sizeThreshold == 0) {
// Select a decent value automatically
heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation();
} }
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx);
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx);
if (approx->hasComputedValues()) { if (approx->hasComputedValues()) {
STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
approx->getExploredMdp()->printModelInformationToStream(std::cout); approx->getExploredMdp()->printModelInformationToStream(std::cout);
@ -215,76 +219,91 @@ namespace storm {
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) { void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
// Set up exploration data // Set up exploration data
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
auto overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
auto underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) {
overApproxBeliefManager->setRewardModel(rewardModelName);
underApproxBeliefManager->setRewardModel(rewardModelName);
}
// OverApproximaion
auto overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
HeuristicParameters heuristicParameters;
heuristicParameters.gapThreshold = storm::utility::convertNumber<ValueType>(options.explorationThreshold);
heuristicParameters.observationThreshold = storm::utility::zero<ValueType>(); // Will be set to lowest observation score automatically
heuristicParameters.sizeThreshold = std::numeric_limits<uint64_t>::max();
heuristicParameters.optimalChoiceValueEpsilon = storm::utility::convertNumber<ValueType>(1e-4);
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation);
if (!overApproximation->hasComputedValues()) {
return;
std::vector<uint64_t> observationResolutionVector;
std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
std::shared_ptr<ExplorerType> overApproximation;
HeuristicParameters overApproxHeuristicPar;
if (options.discretize) { // Setup and build first OverApproximation
observationResolutionVector = std::vector<uint64_t>(pomdp.getNrObservations(), options.resolutionInit);
overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) {
overApproxBeliefManager->setRewardModel(rewardModelName);
}
overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation);
if (!overApproximation->hasComputedValues()) {
return;
}
overApproxValue = overApproximation->getComputedValueAtInitialState();
} }
ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
overApproxValue = overApproximation->getComputedValueAtInitialState();
// UnderApproximation
uint64_t underApproxSizeThreshold;
if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) {
underApproxSizeThreshold = options.beliefMdpSizeThreshold.get();
} else {
underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
}
auto underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
if (!underApproximation->hasComputedValues()) {
return;
std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
std::shared_ptr<ExplorerType> underApproximation;
HeuristicParameters underApproxHeuristicPar;
if (options.unfold) { // Setup and build first OverApproximation
underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) {
underApproxBeliefManager->setRewardModel(rewardModelName);
}
underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
if (underApproxHeuristicPar.sizeThreshold == 0) {
// Select a decent value automatically
underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation();
}
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation);
if (!underApproximation->hasComputedValues()) {
return;
}
underApproxValue = underApproximation->getComputedValueAtInitialState();
} }
ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
underApproxValue = underApproximation->getComputedValueAtInitialState();
// ValueType lastMinScore = storm::utility::infinity<ValueType>();
// Start refinement // Start refinement
statistics.refinementSteps = 0; statistics.refinementSteps = 0;
while (result.diff() > options.refinementPrecision) {
STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || options.refinePrecision.is_initialized(), "No termination criterion for refinement given. Consider to specify a steplimit, precisionlimit or timeout");
STORM_LOG_WARN_COND(!options.refinePrecision.is_initialized() || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined.");
while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps < options.refineStepLimit.get()) && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) {
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
break; break;
} }
++statistics.refinementSteps.get(); ++statistics.refinementSteps.get();
STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << ".");
STORM_PRINT_AND_LOG("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "." << std::endl);
// Refine over-approximation
if (min) {
overApproximation->takeCurrentValuesAsLowerBounds();
} else {
overApproximation->takeCurrentValuesAsUpperBounds();
}
heuristicParameters.gapThreshold /= storm::utility::convertNumber<ValueType, uint64_t>(4);
heuristicParameters.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * 4;
heuristicParameters.observationThreshold += storm::utility::convertNumber<ValueType>(0.1) * (storm::utility::one<ValueType>() - heuristicParameters.observationThreshold);
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, heuristicParameters, observationResolutionVector, overApproxBeliefManager, overApproximation);
if (overApproximation->hasComputedValues()) {
overApproxValue = overApproximation->getComputedValueAtInitialState();
} else {
break;
if (options.discretize) {
// Refine over-approximation
if (min) {
overApproximation->takeCurrentValuesAsLowerBounds();
} else {
overApproximation->takeCurrentValuesAsUpperBounds();
}
overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
overApproxHeuristicPar.sizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor;
overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation);
if (overApproximation->hasComputedValues()) {
overApproxValue = overApproximation->getComputedValueAtInitialState();
} else {
break;
}
} }
if (result.diff() > options.refinementPrecision) {
if (options.unfold && (!options.refinePrecision.is_initialized() || result.diff() > options.refinePrecision.get())) {
// Refine under-approximation // Refine under-approximation
underApproxSizeThreshold *= 4;
underApproxSizeThreshold = std::max<uint64_t>(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates());
STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << ".");
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation);
overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
underApproxHeuristicPar.sizeThreshold = underApproximation->getExploredMdp()->getNumberOfStates() * options.sizeThresholdFactor;
overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation);
if (underApproximation->hasComputedValues()) { if (underApproximation->hasComputedValues()) {
underApproxValue = underApproximation->getComputedValueAtInitialState(); underApproxValue = underApproximation->getComputedValueAtInitialState();
} else { } else {
@ -352,7 +371,7 @@ namespace storm {
} }
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
// current maximal resolution (needed for refinement heuristic) // current maximal resolution (needed for refinement heuristic)
uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
@ -371,9 +390,7 @@ namespace storm {
overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
// We also need to find out which observation resolutions needs refinement. // We also need to find out which observation resolutions needs refinement.
auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution);
ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end());
// Potentially increase the observationThreshold so that at least one observation actually gets refinement. // Potentially increase the observationThreshold so that at least one observation actually gets refinement.
heuristicParameters.observationThreshold = std::max(minRating, heuristicParameters.observationThreshold);
refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;});
STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
for (auto const& obs : refinedObservations) { for (auto const& obs : refinedObservations) {
@ -528,11 +545,11 @@ namespace storm {
} }
template<typename PomdpModelType, typename BeliefValueType> template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
statistics.underApproximationBuildTime.start(); statistics.underApproximationBuildTime.start();
statistics.underApproximationStateLimit = maxStateCount;
if (!underApproximation->hasComputedValues()) {
statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
if (!refine) {
// Build a new under approximation // Build a new under approximation
if (computeRewards) { if (computeRewards) {
underApproximation->startNewExploration(storm::utility::zero<ValueType>()); underApproximation->startNewExploration(storm::utility::zero<ValueType>());
@ -545,6 +562,7 @@ namespace storm {
} }
// Expand the beliefs // Expand the beliefs
uint64_t newlyExploredStates = 0;
while (underApproximation->hasUnexploredState()) { while (underApproximation->hasUnexploredState()) {
uint64_t currId = underApproximation->exploreNextState(); uint64_t currId = underApproximation->exploreNextState();
@ -554,18 +572,24 @@ namespace storm {
underApproximation->addSelfloopTransition(); underApproximation->addSelfloopTransition();
} else { } else {
bool stopExploration = false; bool stopExploration = false;
if (!underApproximation->currentStateHasOldBehavior()) {
if (storm::utility::abs<ValueType>(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
if (!stateAlreadyExplored) {
// Check whether we want to explore the state now!
if (storm::utility::abs<ValueType>(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < heuristicParameters.gapThreshold) {
stopExploration = true; stopExploration = true;
underApproximation->setCurrentStateIsTruncated(); underApproximation->setCurrentStateIsTruncated();
} else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) {
} else if (newlyExploredStates >= heuristicParameters.sizeThreshold) {
stopExploration = true; stopExploration = true;
underApproximation->setCurrentStateIsTruncated(); underApproximation->setCurrentStateIsTruncated();
} }
} }
if (!stopExploration) {
// We are going to explore one more state
++newlyExploredStates;
}
for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
// Always restore old behavior if available // Always restore old behavior if available
if (underApproximation->currentStateHasOldBehavior()) {
if (stateAlreadyExplored) {
underApproximation->restoreOldBehaviorAtCurrentState(action); underApproximation->restoreOldBehaviorAtCurrentState(action);
} else { } else {
ValueType truncationProbability = storm::utility::zero<ValueType>(); ValueType truncationProbability = storm::utility::zero<ValueType>();

35
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -28,13 +28,30 @@ namespace storm {
struct Options { struct Options {
Options(); Options();
uint64_t initialGridResolution; /// Decides how precise the bounds are
ValueType explorationThreshold; /// the threshold for exploration stopping. If the difference between over- and underapproximation for a state is smaller than the threshold, stop exploration of the state
bool doRefinement; /// Sets whether the bounds should be refined automatically until the refinement precision is reached
ValueType refinementPrecision; /// Used to decide when the refinement should terminate
ValueType numericPrecision; /// Used to decide whether two values are equal
bool cacheSubsimplices; /// Enables caching of subsimplices
boost::optional<uint64_t> beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection.
bool discretize;
bool unfold;
bool refine;
boost::optional<uint64_t> refineStepLimit;
boost::optional<ValueType> refinePrecision;
// Controlparameters for the refinement heuristic
// Discretization Resolution
uint64_t resolutionInit;
ValueType resolutionFactor;
// The maximal number of newly expanded MDP states in a refinement step
uint64_t sizeThresholdInit;
uint64_t sizeThresholdFactor;
// Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore
ValueType gapThresholdInit;
ValueType gapThresholdFactor;
// Controls whether "almost optimal" choices will be considered optimal
ValueType optimalChoiceValueThresholdInit;
ValueType optimalChoiceValueThresholdFactor;
// Controls which observations are refined.
ValueType obsThresholdInit;
ValueType obsThresholdIncrementFactor;
ValueType numericPrecision; /// Used to decide whether two beliefs are equal
}; };
struct Result { struct Result {
@ -85,12 +102,12 @@ namespace storm {
/** /**
* Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
*/ */
void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
void buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
/** /**
* Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
*/ */
void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
void buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution);

Loading…
Cancel
Save