You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

207 lines
14 KiB

#pragma once
#include "storm-pomdp/analysis/FormulaInformation.h"
#include "storm/api/verification.h"
#include "storm/models/sparse/Pomdp.h"
#include "storm/models/sparse/StandardRewardModel.h"
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "storm/storage/Scheduler.h"
#include "storm/utility/macros.h"
#include "storm/exceptions/UnexpectedException.h"
#include "storm/exceptions/NotSupportedException.h"
namespace storm {
namespace pomdp {
namespace modelchecker {
template<typename ValueType>
struct TrivialPomdpValueBounds {
std::vector<std::vector<ValueType>> lower;
std::vector<std::vector<ValueType>> upper;
ValueType getHighestLowerBound(uint64_t const& state) {
STORM_LOG_ASSERT(!lower.empty(), "requested a lower bound but none were available");
auto it = lower.begin();
ValueType result = (*it)[state];
for (++it; it != lower.end(); ++it) {
result = std::max(result, (*it)[state]);
}
return result;
}
ValueType getSmallestUpperBound(uint64_t const& state) {
STORM_LOG_ASSERT(!upper.empty(), "requested an upper bound but none were available");
auto it = upper.begin();
ValueType result = (*it)[state];
for (++it; it != upper.end(); ++it) {
result = std::min(result, (*it)[state]);
}
return result;
}
};
template <typename PomdpType>
class TrivialPomdpValueBoundsModelChecker {
public:
typedef typename PomdpType::ValueType ValueType;
typedef TrivialPomdpValueBounds<ValueType> ValueBounds;
TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) {
// Intentionally left empty
}
ValueBounds getValueBounds(storm::logic::Formula const& formula) {
return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula));
}
std::vector<ValueType> getChoiceValues(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards) {
std::vector<ValueType> choiceValues((pomdp.getNumberOfChoices()));
pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards);
return choiceValues;
}
std::vector<ValueType> computeValuesForGuessedScheduler(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, std::shared_ptr<storm::models::sparse::Mdp<ValueType>> underlyingMdp, ValueType const& scoreThreshold, bool relativeScore) {
// Create some positional scheduler for the POMDP
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
// For each state, we heuristically find a good distribution over output actions.
auto choiceValues = getChoiceValues(stateValues, actionBasedRewards);
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
std::vector<storm::storage::Distribution<ValueType, uint_fast64_t>> choiceDistributions(pomdp.getNrObservations());
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)];
ValueType const& stateValue = stateValues[state];
assert(stateValue >= storm::utility::zero<ValueType>());
for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
ValueType const& choiceValue = choiceValues[choice];
assert(choiceValue >= storm::utility::zero<ValueType>());
// Rate this choice by considering the relative difference between the choice value and the (optimal) state value
// A high score shall mean that the choice is "good"
if (storm::utility::isInfinity(stateValue)) {
// For infinity states, we simply distribute uniformly.
// FIXME: This case could be handled a bit more sensible
choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold);
} else {
ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue);
if (relativeScore) {
ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber<ValueType, uint64_t>(2);
if (!storm::utility::isZero(avg)) {
choiceScore /= avg;
}
}
choiceScore = storm::utility::one<ValueType>() - choiceScore;
if (choiceScore >= scoreThreshold) {
choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore);
}
}
}
STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution.");
}
// Normalize all distributions
for (auto& choiceDistribution : choiceDistributions) {
choiceDistribution.normalize();
}
// Set the scheduler for all states
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state);
}
STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
return pomdpSchedulerResult;
}
ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) {
STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis.");
// Compute the values on the fully observable MDP
// We need an actual MDP so that we can apply schedulers below.
// Also, the api call in the next line will require a copy anyway.
auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
std::vector<ValueType> actionBasedRewards;
std::vector<ValueType>* actionBasedRewardsPtr = nullptr;
if (info.isNonNestedExpectedRewardFormula()) {
actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
actionBasedRewardsPtr = &actionBasedRewards;
}
std::vector<std::vector<ValueType>> guessedSchedulerValues;
std::vector<std::pair<double, bool>> guessParameters({{0.875,false},{0.875,true},{0.75,false},{0.75,true}});
for (auto const& pars : guessParameters) {
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(pars.first), pars.second));
}
// compute the 'best' guess and do a few iterations on it
uint64_t bestGuess = 0;
ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero<ValueType>());
for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) {
ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero<ValueType>());
if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) {
bestGuess = guess;
bestGuessSum = guessSum;
}
}
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
// Check if one of the guesses is worse than one of the others (and potentially delete it)
// Avoid deleting entries during the loop to ensure that indices remain valid
storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true);
for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) {
if (!keptGuesses.get(i)) {
continue;
}
for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) {
if (!keptGuesses.get(j)) {
continue;
}
if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal<ValueType>())) {
if (info.minimize()) {
// In this case we are guessing upper bounds (and smaller upper bounds are better)
keptGuesses.set(j, false);
} else {
// In this case we are guessing lower bounds (and larger lower bounds are better)
keptGuesses.set(i, false);
break;
}
} else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal<ValueType>())) {
if (info.minimize()) {
keptGuesses.set(i, false);
break;
} else {
keptGuesses.set(j, false);
}
}
}
}
STORM_LOG_INFO("Keeping scheduler guesses " << keptGuesses);
storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses);
// Finally prepare the result
ValueBounds result;
if (info.minimize()) {
result.lower.push_back(std::move(fullyObservableResult));
result.upper = std::move(guessedSchedulerValues);
} else {
result.lower = std::move(guessedSchedulerValues);
result.upper.push_back(std::move(fullyObservableResult));
}
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal<ValueType>()), "Lower bound is larger than upper bound");
return result;
}
private:
PomdpType const& pomdp;
};
}
}
}