From 37da2b4e1fae34993c05c6153735fc433ad65dfa Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 13:58:45 +0200 Subject: [PATCH] Added a new model checker that allows to compute trivial (but sound) bounds on the value of POMDP states --- .../TrivialPomdpValueBoundsModelChecker.h | 115 ++++++++++++++++++ src/storm/storage/Distribution.cpp | 12 +- src/storm/storage/Distribution.h | 5 + 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h new file mode 100644 index 000000000..862a82a05 --- /dev/null +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -0,0 +1,115 @@ +#pragma once + +#include "storm-pomdp/analysis/FormulaInformation.h" + +#include "storm/api/verification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/storage/Scheduler.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + class TrivialPomdpValueBoundsModelChecker { + public: + typedef typename PomdpType::ValueType ValueType; + TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { + // Intentionally left empty + } + + struct ValueBounds { + std::vector lower; + std::vector upper; + }; + ValueBounds getValueBounds(storm::logic::Formula const& formula) { + return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); + } + + ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { + STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); + // Compute the values on the fully observable MDP + // We need an actual MDP here so that the apply scheduler method below will work. + // Also, the api call in the next line will require a copy anyway. + auto underlyingMdp = std::make_shared>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); + auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Create some positional scheduler for the POMDP + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + // For each state, we heuristically find a good distribution over output actions. + std::vector fullyObservableChoiceValues(pomdp.getNumberOfChoices()); + if (info.isNonNestedExpectedRewardFormula()) { + std::vector actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); + } else { + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); + } + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + storm::storage::Distribution choiceDistribution; + for (auto const &state : obsStates) { + ValueType const& stateValue = fullyObservableResult[state]; + assert(stateValue >= storm::utility::zero()); + for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { + ValueType const& choiceValue = fullyObservableChoiceValues[choice]; + assert(choiceValue >= storm::utility::zero()); + // Rate this choice by considering the relative difference between the choice value and the (optimal) state value + ValueType choiceRating; + if (stateValue < choiceValue) { + choiceRating = choiceValue - stateValue; + if (!storm::utility::isZero(choiceValue)) { + choiceRating /= choiceValue; + } + } else { + choiceRating = stateValue - choiceValue; + if (!storm::utility::isZero(stateValue)) { + choiceRating /= stateValue; + } + } + assert(choiceRating <= storm::utility::one()); + assert(choiceRating >= storm::utility::zero()); + // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice + if (choiceRating <= storm::utility::convertNumber(0.5)) { + choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one() - choiceRating); + } + } + } + choiceDistribution.normalize(); + for (auto const& state : obsStates) { + pomdpScheduler.setChoice(choiceDistribution, state); + } + } + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + + auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Finally prepare the result + ValueBounds result; + if (info.minimize()) { + result.lower = std::move(fullyObservableResult); + result.upper = std::move(pomdpSchedulerResult); + } else { + result.lower = std::move(pomdpSchedulerResult); + result.upper = std::move(fullyObservableResult); + } + return result; + } + + private: + PomdpType const& pomdp; + }; + } + } +} \ No newline at end of file diff --git a/src/storm/storage/Distribution.cpp b/src/storm/storage/Distribution.cpp index f40afb402..2290c611c 100644 --- a/src/storm/storage/Distribution.cpp +++ b/src/storm/storage/Distribution.cpp @@ -166,7 +166,17 @@ namespace storm { } } - + template + void Distribution::normalize() { + ValueType sum = storm::utility::zero(); + for (auto const& entry: distribution) { + sum += entry.second; + } + for (auto& entry: distribution) { + entry.second /= sum; + } + } + template class Distribution; template std::ostream& operator<<(std::ostream& out, Distribution const& distribution); diff --git a/src/storm/storage/Distribution.h b/src/storm/storage/Distribution.h index d7e0bd2fb..c3ac58dcc 100644 --- a/src/storm/storage/Distribution.h +++ b/src/storm/storage/Distribution.h @@ -144,6 +144,11 @@ namespace storm { */ ValueType getProbability(StateType const& state) const; + /*! + * Normalizes the distribution such that the values sum up to one. + */ + void normalize(); + private: // A list of states and the probabilities that are assigned to them. container_type distribution;