Browse Source
Added a new model checker that allows to compute trivial (but sound) bounds on the value of POMDP states
tempestpy_adaptions
Added a new model checker that allows to compute trivial (but sound) bounds on the value of POMDP states
tempestpy_adaptions
Tim Quatmann
5 years ago
3 changed files with 131 additions and 1 deletions
-
115src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
-
12src/storm/storage/Distribution.cpp
-
5src/storm/storage/Distribution.h
@ -0,0 +1,115 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include "storm-pomdp/analysis/FormulaInformation.h" |
||||
|
|
||||
|
#include "storm/api/verification.h" |
||||
|
#include "storm/models/sparse/Pomdp.h" |
||||
|
#include "storm/models/sparse/StandardRewardModel.h" |
||||
|
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" |
||||
|
#include "storm/storage/Scheduler.h" |
||||
|
|
||||
|
#include "storm/utility/macros.h" |
||||
|
#include "storm/exceptions/UnexpectedException.h" |
||||
|
#include "storm/exceptions/NotSupportedException.h" |
||||
|
|
||||
|
namespace storm { |
||||
|
namespace pomdp { |
||||
|
namespace modelchecker { |
||||
|
template <typename PomdpType> |
||||
|
class TrivialPomdpValueBoundsModelChecker { |
||||
|
public: |
||||
|
typedef typename PomdpType::ValueType ValueType; |
||||
|
TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { |
||||
|
// Intentionally left empty |
||||
|
} |
||||
|
|
||||
|
struct ValueBounds { |
||||
|
std::vector<ValueType> lower; |
||||
|
std::vector<ValueType> upper; |
||||
|
}; |
||||
|
ValueBounds getValueBounds(storm::logic::Formula const& formula) { |
||||
|
return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); |
||||
|
} |
||||
|
|
||||
|
ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { |
||||
|
STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); |
||||
|
// Compute the values on the fully observable MDP |
||||
|
// We need an actual MDP here so that the apply scheduler method below will work. |
||||
|
// Also, the api call in the next line will require a copy anyway. |
||||
|
auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); |
||||
|
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false)); |
||||
|
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); |
||||
|
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); |
||||
|
std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
||||
|
|
||||
|
// Create some positional scheduler for the POMDP |
||||
|
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates()); |
||||
|
// For each state, we heuristically find a good distribution over output actions. |
||||
|
std::vector<ValueType> fullyObservableChoiceValues(pomdp.getNumberOfChoices()); |
||||
|
if (info.isNonNestedExpectedRewardFormula()) { |
||||
|
std::vector<ValueType> actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); |
||||
|
pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); |
||||
|
} else { |
||||
|
pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); |
||||
|
} |
||||
|
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); |
||||
|
for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { |
||||
|
auto obsStates = pomdp.getStatesWithObservation(obs); |
||||
|
storm::storage::Distribution<ValueType, uint_fast64_t> choiceDistribution; |
||||
|
for (auto const &state : obsStates) { |
||||
|
ValueType const& stateValue = fullyObservableResult[state]; |
||||
|
assert(stateValue >= storm::utility::zero<ValueType>()); |
||||
|
for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { |
||||
|
ValueType const& choiceValue = fullyObservableChoiceValues[choice]; |
||||
|
assert(choiceValue >= storm::utility::zero<ValueType>()); |
||||
|
// Rate this choice by considering the relative difference between the choice value and the (optimal) state value |
||||
|
ValueType choiceRating; |
||||
|
if (stateValue < choiceValue) { |
||||
|
choiceRating = choiceValue - stateValue; |
||||
|
if (!storm::utility::isZero(choiceValue)) { |
||||
|
choiceRating /= choiceValue; |
||||
|
} |
||||
|
} else { |
||||
|
choiceRating = stateValue - choiceValue; |
||||
|
if (!storm::utility::isZero(stateValue)) { |
||||
|
choiceRating /= stateValue; |
||||
|
} |
||||
|
} |
||||
|
assert(choiceRating <= storm::utility::one<ValueType>()); |
||||
|
assert(choiceRating >= storm::utility::zero<ValueType>()); |
||||
|
// choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice |
||||
|
if (choiceRating <= storm::utility::convertNumber<ValueType>(0.5)) { |
||||
|
choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one<ValueType>() - choiceRating); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
choiceDistribution.normalize(); |
||||
|
for (auto const& state : obsStates) { |
||||
|
pomdpScheduler.setChoice(choiceDistribution, state); |
||||
|
} |
||||
|
} |
||||
|
auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); |
||||
|
|
||||
|
auto resultPtr2 = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false)); |
||||
|
STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); |
||||
|
STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); |
||||
|
std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
||||
|
|
||||
|
// Finally prepare the result |
||||
|
ValueBounds result; |
||||
|
if (info.minimize()) { |
||||
|
result.lower = std::move(fullyObservableResult); |
||||
|
result.upper = std::move(pomdpSchedulerResult); |
||||
|
} else { |
||||
|
result.lower = std::move(pomdpSchedulerResult); |
||||
|
result.upper = std::move(fullyObservableResult); |
||||
|
} |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
PomdpType const& pomdp; |
||||
|
}; |
||||
|
} |
||||
|
} |
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue