Browse Source

Changed method signatures to new data structures.

main
Tim Quatmann 5 years ago
parent
commit
71e0654498
  1. 10
      src/storm-pomdp/builder/BeliefMdpExplorer.h
  2. 765
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  3. 107
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

10
src/storm-pomdp/builder/BeliefMdpExplorer.h

@ -6,6 +6,7 @@
#include <map> #include <map>
#include <boost/optional.hpp> #include <boost/optional.hpp>
#include "storm-parsers/api/properties.h"
#include "storm/api/properties.h" #include "storm/api/properties.h"
#include "storm/api/verification.h" #include "storm/api/verification.h"
@ -13,20 +14,25 @@
#include "storm/utility/macros.h" #include "storm/utility/macros.h"
#include "storm-pomdp/storage/BeliefManager.h" #include "storm-pomdp/storage/BeliefManager.h"
#include "storm/utility/SignalHandler.h" #include "storm/utility/SignalHandler.h"
#include "storm/modelchecker/results/CheckResult.h"
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
namespace storm { namespace storm {
namespace builder { namespace builder {
template<typename PomdpType>
template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType>
class BeliefMdpExplorer { class BeliefMdpExplorer {
public: public:
typedef typename PomdpType::ValueType ValueType; typedef typename PomdpType::ValueType ValueType;
typedef storm::storage::BeliefManager<PomdpType> BeliefManagerType;
typedef storm::storage::BeliefManager<PomdpType, BeliefValueType> BeliefManagerType;
typedef typename BeliefManagerType::BeliefId BeliefId; typedef typename BeliefManagerType::BeliefId BeliefId;
typedef uint64_t MdpStateType; typedef uint64_t MdpStateType;
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, std::vector<ValueType> const& pomdpLowerValueBounds, std::vector<ValueType> const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) {
// Intentionally left empty // Intentionally left empty
} }
BeliefMdpExplorer(BeliefMdpExplorer&& other) = default;
void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) { void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) {
// Reset data from potential previous explorations // Reset data from potential previous explorations

765
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -15,13 +15,8 @@
#include "storm/models/sparse/StandardRewardModel.h" #include "storm/models/sparse/StandardRewardModel.h"
#include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h"
#include "storm/utility/vector.h" #include "storm/utility/vector.h"
#include "storm/modelchecker/results/CheckResult.h"
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h"
#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp"
#include "storm/api/properties.h" #include "storm/api/properties.h"
#include "storm/api/export.h" #include "storm/api/export.h"
#include "storm-parsers/api/storm-parsers.h"
#include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h"
#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
@ -32,8 +27,8 @@
namespace storm { namespace storm {
namespace pomdp { namespace pomdp {
namespace modelchecker { namespace modelchecker {
template<typename ValueType, typename RewardModelType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Options::Options() {
template<typename PomdpModelType, typename BeliefValueType>
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Options::Options() {
initialGridResolution = 10; initialGridResolution = 10;
explorationThreshold = storm::utility::zero<ValueType>(); explorationThreshold = storm::utility::zero<ValueType>();
doRefinement = true; doRefinement = true;
@ -41,61 +36,78 @@ namespace storm {
numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9); numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9);
cacheSubsimplices = false; cacheSubsimplices = false;
} }
template<typename ValueType, typename RewardModelType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
template<typename PomdpModelType, typename BeliefValueType>
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) {
// Intentionally left empty
}
template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::diff(bool relative) const {
ValueType diff = upperBound - lowerBound;
if (diff < storm::utility::zero<ValueType>()) {
STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
diff = storm::utility::zero<ValueType >();
}
if (relative && !storm::utility::isZero(upperBound)) {
diff /= upperBound;
}
return diff;
}
template<typename PomdpModelType, typename BeliefValueType>
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) {
// intentionally left empty; // intentionally left empty;
} }
template<typename ValueType, typename RewardModelType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options) : pomdp(pomdp), options(options) {
template<typename PomdpModelType, typename BeliefValueType>
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) {
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false); cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false);
} }
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::check(storm::logic::Formula const& formula) {
template<typename PomdpModelType, typename BeliefValueType>
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) {
// Reset all collected statistics // Reset all collected statistics
statistics = Statistics(); statistics = Statistics();
std::unique_ptr<POMDPCheckResult<ValueType>> result;
// Extract the relevant information from the formula // Extract the relevant information from the formula
auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula);
// Compute some initial bounds on the values for each state of the pomdp // Compute some initial bounds on the values for each state of the pomdp
auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo); auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo);
Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]);
if (formulaInfo.isNonNestedReachabilityProbability()) {
// FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
if (!formulaInfo.getSinkStates().empty()) {
auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
}
if (options.doRefinement) {
result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
} else {
result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
}
} else if (formulaInfo.isNonNestedExpectedRewardFormula()) {
boost::optional<std::string> rewardModelName;
if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
// FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
if (options.doRefinement) {
result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true);
if (formulaInfo.isNonNestedReachabilityProbability()) {
if (!formulaInfo.getSinkStates().empty()) {
auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
}
} else { } else {
// FIXME: pick the non-unique reward model here
STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper);
// Expected reward formula!
rewardModelName = formulaInfo.getRewardModelName();
} }
} else { } else {
STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
} }
if (options.doRefinement) {
refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
} else {
computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result);
}
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
statistics.aborted = true; statistics.aborted = true;
} }
return result; return result;
} }
template<typename ValueType, typename RewardModelType>
void ApproximatePOMDPModelchecker<ValueType, RewardModelType>::printStatisticsToStream(std::ostream& stream) const {
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::printStatisticsToStream(std::ostream& stream) const {
stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "##### Grid Approximation Statistics ######" << std::endl;
stream << "# Input model: " << std::endl; stream << "# Input model: " << std::endl;
pomdp.printModelInformationToStream(stream); pomdp.printModelInformationToStream(stream);
@ -143,114 +155,82 @@ namespace storm {
stream << "##########################################" << std::endl; stream << "##########################################" << std::endl;
} }
std::shared_ptr<storm::logic::Formula const> createStandardProperty(bool min, bool computeRewards) {
std::string propertyString = computeRewards ? "R" : "P";
propertyString += min ? "min" : "max";
propertyString += "=? [F \"target\"]";
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
return storm::api::extractFormulasFromProperties(propertyVector).front();
}
template<typename ValueType>
storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property, std::vector<ValueType>&& hintVector) {
//Note: The property should not run out of scope after calling this because the task only stores the property by reference.
// Therefore, this method needs the property by reference (and not const reference)
auto task = storm::api::createTask<ValueType>(property, false);
if (!hintVector.empty()) {
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>();
hint.setResultHint(std::move(hintVector));
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
task.setHint(hintPtr);
}
return task;
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards) {
std::srand(time(NULL));
// Compute easy upper and lower bounds
storm::utility::Stopwatch underlyingWatch(true);
// Compute the results on the underlying MDP as a basic overapproximation
storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling());
// TODO: Is the following really necessary
underlyingMdpLabeling.addLabel("__goal__");
std::vector<uint64_t> goalStates;
for (auto const &targetObs : targetObservations) {
for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) {
underlyingMdpLabeling.addLabelToState("__goal__", goalState);
}
}
storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels());
auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(
std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp));
std::string initPropString = computeRewards ? "R" : "P";
initPropString += min ? "min" : "max";
initPropString += "=? [F \"__goal__\"]";
std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString);
std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front();
STORM_PRINT("Underlying MDP" << std::endl)
if (computeRewards) {
underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel());
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
} }
underlyingMdp.printModelInformationToStream(std::cout);
std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(
storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
underlyingWatch.stop();
storm::utility::Stopwatch positionalWatch(true);
// we define some positional scheduler for the POMDP as a basic lower bound
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
auto obsStates = pomdp.getStatesWithObservation(obs);
// select a random action for all states with the same observation
uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front());
for (auto const &state : obsStates) {
pomdpScheduler.setChoice(chosenAction, state);
uint64_t underApproxSizeThreshold = 0;
{ // Overapproximation
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) {
manager->setRewardModel(rewardModelName);
}
auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager);
if (approx) {
STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n");
approx->getExploredMdp()->printModelInformationToStream(std::cout);
ValueType& resultValue = min ? result.lowerBound : result.upperBound;
resultValue = approx->getComputedValueAtInitialState();
underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates();
}
}
{ // Underapproximation (Uses a fresh Belief manager)
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) {
manager->setRewardModel(rewardModelName);
}
auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager);
if (approx) {
STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n");
approx->getExploredMdp()->printModelInformationToStream(std::cout);
ValueType& resultValue = min ? result.upperBound : result.lowerBound;
resultValue = approx->getComputedValueAtInitialState();
} }
} }
auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false);
if (computeRewards) {
underApproxModel->restrictRewardModels({"std"});
}
STORM_PRINT("Random Positional Scheduler" << std::endl)
underApproxModel->printModelInformationToStream(std::cout);
std::unique_ptr<storm::modelchecker::CheckResult> underapproxRes(
storm::api::verifyWithSparseEngine<ValueType>(underApproxModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
STORM_LOG_ASSERT(underapproxRes, "Result not exist.");
underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true)));
auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
positionalWatch.stop();
STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // "
<< initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl)
STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
// Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
// This can probably be improved (i.e. resolutions for single belief states)
STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl)
}
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result) {
// Set up exploration data
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
std::set<uint32_t> changedObservations;
uint64_t underApproxModelSize = 200;
uint64_t refinementCounter = 1;
STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
{},
{}, underApproxModelSize);
if (res == nullptr) {
statistics.refinementSteps = 0;
return nullptr;
auto beliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision);
if (rewardModelName) {
beliefManager->setRewardModel(rewardModelName);
}
// OverApproximaion
auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager);
if (!overApproximation) {
return;
} }
ValueType lastMinScore = storm::utility::infinity<ValueType>();
while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) ||
(min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) {
ValueType& overApproxValue = min ? result.lowerBound : result.upperBound;
overApproxValue = overApproximation->getComputedValueAtInitialState();
// UnderApproximation TODO: use same belief manager?)
uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates();
auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager);
if (!underApproximation) {
return;
}
ValueType& underApproxValue = min ? result.upperBound : result.lowerBound;
underApproxValue = underApproximation->getComputedValueAtInitialState();
// ValueType lastMinScore = storm::utility::infinity<ValueType>();
// Start refinement
statistics.refinementSteps = 0;
while (result.diff() > options.refinementPrecision) {
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
break; break;
} }
// TODO the actual refinement // TODO the actual refinement
/*
// choose which observation(s) to refine // choose which observation(s) to refine
std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>()); std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0); std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
@ -286,9 +266,9 @@ namespace storm {
} }
/*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
}*/
//for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
// obsAccumulator[i] /= storm::utility::convertNumber<ValueType>(beliefCount[i]);
//}
changedObservations.clear(); changedObservations.clear();
//TODO think about some other scoring methods //TODO think about some other scoring methods
@ -301,21 +281,21 @@ namespace storm {
observationResolutionVector[i] = maxRes + 1; observationResolutionVector[i] = maxRes + 1;
changedObservations.insert(i); changedObservations.insert(i);
} }
/*} else {
lastMinScore = std::min(maxAvgDifference, lastMinScore);
STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
//STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
//} else {
// lastMinScore = std::min(maxAvgDifference, lastMinScore);
// STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
// STORM_PRINT("Last Min Score: " << lastMinScore << std::endl)
// //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl)
// for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
//STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i])
if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
// if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) {
//STORM_PRINT(" *** ") //STORM_PRINT(" *** ")
observationResolutionVector[i] += 1;
changedObservations.insert(i);
}
// observationResolutionVector[i] += 1;
// changedObservations.insert(i);
// }
//STORM_PRINT(std::endl) //STORM_PRINT(std::endl)
}
}*/
// }
//}
if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) { if (underApproxModelSize < std::numeric_limits<uint64_t>::max() - 101) {
underApproxModelSize += 100; underApproxModelSize += 100;
} }
@ -327,60 +307,13 @@ namespace storm {
STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
cc.isEqual(res->underApproxValue, res->overApproxValue), cc.isEqual(res->underApproxValue, res->overApproxValue),
"The value for the under-approximation is larger than the value for the over-approximation."); "The value for the under-approximation is larger than the value for the over-approximation.");
++refinementCounter;
}
statistics.refinementSteps = refinementCounter;
if (min) {
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->underApproxValue, res->overApproxValue});
} else {
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->overApproxValue, res->underApproxValue});
}
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min,
bool computeRewards,
std::vector<ValueType> const& lowerPomdpValueBounds,
std::vector<ValueType> const& upperPomdpValueBounds,
uint64_t maxUaModelSize) {
STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds,
upperPomdpValueBounds, maxUaModelSize);
if (result == nullptr) {
return nullptr;
}
if (min) {
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->underApproxValue, result->overApproxValue});
} else {
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
*/
++statistics.refinementSteps.get();
} }
} }
template <typename ValueType, typename BeliefType, typename SummandsType>
ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
ValueType result = storm::utility::zero<ValueType>();
for (auto const& entry : belief) {
result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
}
return result;
}
template<typename ValueType, typename RewardModelType>
std::shared_ptr<RefinementComponents<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector,
bool computeRewards,
std::vector<ValueType> const& lowerPomdpValueBounds,
std::vector<ValueType> const& upperPomdpValueBounds,
uint64_t maxUaModelSize) {
auto beliefManager = std::make_shared<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>>(pomdp, options.numericPrecision);
if (computeRewards) {
beliefManager->setRewardModel(); // TODO: get actual name
}
template<typename PomdpModelType, typename BeliefValueType>
std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager) {
statistics.overApproximationBuildTime.start(); statistics.overApproximationBuildTime.start();
storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
if (computeRewards) { if (computeRewards) {
@ -390,9 +323,6 @@ namespace storm {
} }
// Expand the beliefs to generate the grid on-the-fly // Expand the beliefs to generate the grid on-the-fly
if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
}
while (explorer.hasUnexploredState()) { while (explorer.hasUnexploredState()) {
uint64_t currId = explorer.exploreNextState(); uint64_t currId = explorer.exploreNextState();
@ -445,39 +375,20 @@ namespace storm {
explorer.finishExploration(); explorer.finishExploration();
statistics.overApproximationBuildTime.stop(); statistics.overApproximationBuildTime.stop();
STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
explorer.getExploredMdp()->printModelInformationToStream(std::cout);
statistics.overApproximationCheckTime.start(); statistics.overApproximationCheckTime.start();
explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
statistics.overApproximationCheckTime.stop(); statistics.overApproximationCheckTime.stop();
STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl);
STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
//auto underApprox = weightedSumUnderMap[initialBelief.id];
auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds);
if (storm::utility::resources::isTerminate() && !underApproxComponents) {
// TODO: return other components needed for refinement.
//return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
//return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()});
}
STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
/* TODO: return other components needed for refinement.
return std::make_unique<RefinementComponents<ValueType>>(
RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
*/
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {},
underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()});
return std::make_shared<ExplorerType>(std::move(explorer));
} }
template<typename ValueType, typename RewardModelType>
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
/*TODO:
template<typename PomdpModelType, typename BeliefValueType>
std::shared_ptr<RefinementComponents<ValueType>> std::shared_ptr<RefinementComponents<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, bool computeRewards,
std::shared_ptr<RefinementComponents<ValueType>> refinementComponents, std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
@ -504,7 +415,7 @@ namespace storm {
uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextBeliefId = refinementComponents->beliefList.size();
uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates();
std::set<uint64_t> relevantStates;
std::set<uint64_t> relevantStates; // The MDP states where the observation has changed
for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
auto currentBelief = refinementComponents->beliefList[iter.first]; auto currentBelief = refinementComponents->beliefList[iter.first];
if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { if (changedObservations.find(currentBelief.observation) != changedObservations.end()) {
@ -512,7 +423,7 @@ namespace storm {
} }
} }
std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck;
std::set<std::pair<uint64_t, uint64_t>> statesAndActionsToCheck; // The predecessors of states where the observation has changed
for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) {
for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) {
for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( for (typename storm::storage::SparseMatrix<ValueType>::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(
@ -536,6 +447,7 @@ namespace storm {
action); action);
std::map<uint64_t, ValueType> transitionInActionBelief; std::map<uint64_t, ValueType> transitionInActionBelief;
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
// Expand and triangulate the successor
uint32_t observation = iter->first; uint32_t observation = iter->first;
uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget,
targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId);
@ -803,160 +715,12 @@ namespace storm {
refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap,
underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId});
} }
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
// return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true);
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min) {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
// return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
*/
} }
template<typename ValueType, typename RewardModelType>
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> const &targetObservations,
uint64_t initialBeliefId, bool min,
bool computeRewards, uint64_t maxModelSize) {
std::set<uint64_t> visitedBelieves;
std::deque<uint64_t> beliefsToBeExpanded;
bsmap_type beliefStateMap;
std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
{{{1, storm::utility::one<ValueType>()}}}};
std::vector<uint64_t> targetStates = {1};
uint64_t stateId = 2;
beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId));
++stateId;
uint64_t nextId = beliefList.size();
uint64_t counter = 0;
statistics.underApproximationBuildTime.start();
// Expand the believes
visitedBelieves.insert(initialBeliefId);
beliefsToBeExpanded.push_back(initialBeliefId);
while (!beliefsToBeExpanded.empty()) {
//TODO think of other ways to stop exploration besides model size
auto currentBeliefId = beliefsToBeExpanded.front();
uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front());
// for targets, we only consider one action with one transition
if (beliefIsTarget[currentBeliefId]) {
// add a self-loop to target states
targetStates.push_back(beliefStateMap.left.at(currentBeliefId));
transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one<ValueType>()}}});
} else if (counter > maxModelSize) {
transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
} else {
// Iterate over all actions and add the corresponding transitions
std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
//TODO add a way to extract the actions from the over-approx and use them here?
for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint64_t, ValueType> transitionsInStateWithAction;
std::map<uint32_t, ValueType> observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action);
for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId],
action,
observation, nextId);
nextId = beliefList.size();
if (visitedBelieves.insert(nextBeliefId).second) {
beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId));
++stateId;
beliefsToBeExpanded.push_back(nextBeliefId);
++counter;
}
transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second;
}
actionTransitionStorage.push_back(transitionsInStateWithAction);
}
transitions.push_back(actionTransitionStorage);
}
beliefsToBeExpanded.pop_front();
if (storm::utility::resources::isTerminate()) {
statistics.underApproximationBuildAborted = true;
break;
}
}
statistics.underApproximationStates = transitions.size();
if (storm::utility::resources::isTerminate()) {
statistics.underApproximationBuildTime.stop();
return nullptr;
}
template<typename PomdpModelType, typename BeliefValueType>
std::shared_ptr<typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ExplorerType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager) {
storm::models::sparse::StateLabeling labeling(transitions.size());
labeling.addLabel("init");
labeling.addLabel("target");
labeling.addLabelToState("init", 0);
for (auto targetState : targetStates) {
labeling.addLabelToState("target", targetState);
}
std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> model;
auto transitionMatrix = buildTransitionMatrix(transitions);
if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) {
transitionMatrix.makeRowGroupingTrivial();
}
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(transitionMatrix, labeling);
storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
for (auto const &iter : beliefStateMap.left) {
auto currentBelief = beliefList[iter.first];
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
// Add the reward
rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
currentBelief));
}
}
underApproxMdp.addRewardModel("std", rewardModel);
underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
}
model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
model->printModelInformationToStream(std::cout);
statistics.underApproximationBuildTime.stop();
std::string propertyString;
if (computeRewards) {
propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
} else {
propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]";
}
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
statistics.underApproximationCheckTime.start();
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
statistics.underApproximationCheckTime.stop();
if (storm::utility::resources::isTerminate() && !res) {
return nullptr;
}
STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::shared_ptr<storm::storage::BeliefManager<storm::models::sparse::Pomdp<ValueType>>> beliefManager,
std::set<uint32_t> const &targetObservations, bool min,
bool computeRewards, uint64_t maxModelSize, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds) {
// Build the belief MDP until enough states are explored.
//TODO think of other ways to stop exploration besides model size
statistics.underApproximationBuildTime.start(); statistics.underApproximationBuildTime.start();
storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); storm::builder::BeliefMdpExplorer<storm::models::sparse::Pomdp<ValueType>> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds);
if (computeRewards) { if (computeRewards) {
@ -981,7 +745,7 @@ namespace storm {
if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { if (storm::utility::abs<ValueType>(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) {
stopExploration = true; stopExploration = true;
explorer.setCurrentStateIsTruncated(); explorer.setCurrentStateIsTruncated();
} else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) {
} else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) {
stopExploration = true; stopExploration = true;
explorer.setCurrentStateIsTruncated(); explorer.setCurrentStateIsTruncated();
} }
@ -1024,249 +788,22 @@ namespace storm {
explorer.finishExploration(); explorer.finishExploration();
statistics.underApproximationBuildTime.stop(); statistics.underApproximationBuildTime.stop();
STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl);
explorer.getExploredMdp()->printModelInformationToStream(std::cout);
statistics.underApproximationCheckTime.start(); statistics.underApproximationCheckTime.start();
explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
statistics.underApproximationCheckTime.stop(); statistics.underApproximationCheckTime.stop();
STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl);
STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl);
return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{explorer.getComputedValueAtInitialState(), {}, {}});
}
template<typename ValueType, typename RewardModelType>
storm::storage::SparseMatrix<ValueType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::buildTransitionMatrix(std::vector<std::vector<std::map<uint64_t, ValueType>>> &transitions) {
uint_fast64_t currentRow = 0;
uint_fast64_t currentRowGroup = 0;
uint64_t nrColumns = transitions.size();
uint64_t nrRows = 0;
uint64_t nrEntries = 0;
for (auto const &actionTransitions : transitions) {
for (auto const &map : actionTransitions) {
nrEntries += map.size();
++nrRows;
}
}
storm::storage::SparseMatrixBuilder<ValueType> smb(nrRows, nrColumns, nrEntries, true, true);
for (auto const &actionTransitions : transitions) {
smb.newRowGroup(currentRow);
for (auto const &map : actionTransitions) {
for (auto const &transition : map) {
smb.addNextValue(currentRow, transition.first, transition.second);
}
++currentRow;
}
++currentRowGroup;
}
return smb.build();
}
template<typename ValueType, typename RewardModelType>
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefIdInVector(
std::vector<storm::pomdp::Belief<ValueType>> const &grid, uint32_t observation,
std::map<uint64_t, ValueType> &probabilities) {
// TODO This one is quite slow
for (auto const &belief : grid) {
if (belief.observation == observation) {
bool same = true;
for (auto const &probEntry : belief.probabilities) {
if (probabilities.find(probEntry.first) == probabilities.end()) {
same = false;
break;
}
if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) {
same = false;
break;
}
}
if (same) {
return belief.id;
}
}
}
return -1;
}
template<typename ValueType, typename RewardModelType>
storm::pomdp::Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(uint64_t id) {
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
"POMDP contains more than one initial state");
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
"POMDP does not contain an initial state");
std::map<uint64_t, ValueType> distribution;
uint32_t observation = 0;
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
if (pomdp.getInitialStates()[state] == 1) {
distribution[state] = storm::utility::one<ValueType>();
observation = pomdp.getObservation(state);
break;
}
}
return storm::pomdp::Belief<ValueType>{id, observation, distribution};
}
template<typename ValueType, typename RewardModelType>
std::pair<std::vector<std::map<uint64_t, ValueType>>, std::vector<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeSubSimplexAndLambdas(
std::map<uint64_t, ValueType> &probabilities, uint64_t resolution, uint64_t nrStates) {
//TODO this can also be simplified using the sparse vector interpretation
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
// Variable names are based on the paper
std::vector<ValueType> x(nrStates);
std::vector<ValueType> v(nrStates);
std::vector<ValueType> d(nrStates);
auto convResolution = storm::utility::convertNumber<ValueType>(resolution);
for (size_t i = 0; i < nrStates; ++i) {
for (auto const &probEntry : probabilities) {
if (probEntry.first >= i) {
x[i] += convResolution * probEntry.second;
}
}
v[i] = storm::utility::floor(x[i]);
d[i] = x[i] - v[i];
}
auto p = storm::utility::vector::getSortedIndices(d);
std::vector<std::vector<ValueType>> qs(nrStates, std::vector<ValueType>(nrStates));
for (size_t i = 0; i < nrStates; ++i) {
if (i == 0) {
for (size_t j = 0; j < nrStates; ++j) {
qs[i][j] = v[j];
}
} else {
for (size_t j = 0; j < nrStates; ++j) {
if (j == p[i - 1]) {
qs[i][j] = qs[i - 1][j] + storm::utility::one<ValueType>();
} else {
qs[i][j] = qs[i - 1][j];
}
}
}
}
std::vector<std::map<uint64_t, ValueType>> subSimplex(nrStates);
for (size_t j = 0; j < nrStates; ++j) {
for (size_t i = 0; i < nrStates - 1; ++i) {
if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][i] - qs[j][i + 1])) {
subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution;
}
}
if (cc.isLess(storm::utility::zero<ValueType>(), qs[j][nrStates - 1])) {
subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution;
}
}
std::vector<ValueType> lambdas(nrStates, storm::utility::zero<ValueType>());
auto sum = storm::utility::zero<ValueType>();
for (size_t i = 1; i < nrStates; ++i) {
lambdas[i] = d[p[i - 1]] - d[p[i]];
sum += d[p[i - 1]] - d[p[i]];
}
lambdas[0] = storm::utility::one<ValueType>() - sum;
return std::make_pair(subSimplex, lambdas);
}
template<typename ValueType, typename RewardModelType>
std::map<uint32_t, ValueType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeObservationProbabilitiesAfterAction(
storm::pomdp::Belief<ValueType> &belief,
uint64_t actionIndex) {
std::map<uint32_t, ValueType> res;
// the id is not important here as we immediately discard the belief (very hacky, I don't like it either)
std::map<uint64_t, ValueType> postProbabilities;
for (auto const &probEntry : belief.probabilities) {
uint64_t state = probEntry.first;
auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
for (auto const &entry : row) {
if (entry.getValue() > 0) {
postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
}
}
}
for (auto const &probEntry : postProbabilities) {
uint32_t observation = pomdp.getObservation(probEntry.first);
if (res.count(observation) == 0) {
res[observation] = probEntry.second;
} else {
res[observation] += probEntry.second;
}
}
return res;
return std::make_shared<ExplorerType>(std::move(explorer));
} }
template<typename ValueType, typename RewardModelType>
uint64_t ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getBeliefAfterActionAndObservation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget, std::set<uint32_t> const &targetObservations, storm::pomdp::Belief<ValueType> &belief, uint64_t actionIndex,
uint32_t observation, uint64_t id) {
std::map<uint64_t, ValueType> distributionAfter;
for (auto const &probEntry : belief.probabilities) {
uint64_t state = probEntry.first;
auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex)));
for (auto const &entry : row) {
if (pomdp.getObservation(entry.getColumn()) == observation) {
distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue();
}
}
}
// We have to normalize the distribution
auto sum = storm::utility::zero<ValueType>();
for (auto const &entry : distributionAfter) {
sum += entry.second;
}
for (auto const &entry : distributionAfter) {
distributionAfter[entry.first] /= sum;
}
if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) {
auto res = getBeliefIdInVector(beliefList, observation, distributionAfter);
return res;
} else {
beliefList.push_back(storm::pomdp::Belief<ValueType>{id, observation, distributionAfter});
beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
return id;
}
template<typename PomdpModelType, typename BeliefValueType>
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) {
// TODO
} }
template<typename ValueType, typename RewardModelType>
ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
auto result = storm::utility::zero<ValueType>();
for (auto const &probEntry : belief) {
result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
}
return result;
}
template<typename ValueType, typename RewardModelType>
ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
auto result = storm::utility::zero<ValueType>();
for (auto const &probEntry : belief.probabilities) {
result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
}
return result;
}
template
class ApproximatePOMDPModelchecker<double>;
#ifdef STORM_HAVE_CARL
template
class ApproximatePOMDPModelchecker<storm::RationalNumber>;
template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>;
template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
#endif
} }
} }
} }

107
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -4,6 +4,7 @@
#include "storm/utility/logging.h" #include "storm/utility/logging.h"
#include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/Belief.h"
#include "storm-pomdp/storage/BeliefManager.h" #include "storm-pomdp/storage/BeliefManager.h"
#include "storm-pomdp/builder/BeliefMdpExplorer.h"
#include <boost/bimap.hpp> #include <boost/bimap.hpp>
#include "storm/storage/jani/Property.h" #include "storm/storage/jani/Property.h"
@ -17,12 +18,6 @@ namespace storm {
namespace modelchecker { namespace modelchecker {
typedef boost::bimap<uint64_t, uint64_t> bsmap_type; typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
template<class ValueType>
struct POMDPCheckResult {
ValueType overApproxValue;
ValueType underApproxValue;
};
/** /**
* Struct containing information which is supposed to be persistent over multiple refinement steps * Struct containing information which is supposed to be persistent over multiple refinement steps
* *
@ -49,9 +44,13 @@ namespace storm {
bsmap_type underApproxBeliefStateMap; bsmap_type underApproxBeliefStateMap;
}; };
template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType>
class ApproximatePOMDPModelchecker { class ApproximatePOMDPModelchecker {
public: public:
typedef typename PomdpModelType::ValueType ValueType;
typedef typename PomdpModelType::RewardModelType RewardModelType;
typedef storm::storage::BeliefManager<PomdpModelType, BeliefValueType> BeliefManagerType;
typedef storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType> ExplorerType;
struct Options { struct Options {
Options(); Options();
@ -63,85 +62,60 @@ namespace storm {
bool cacheSubsimplices; /// Enables caching of subsimplices bool cacheSubsimplices; /// Enables caching of subsimplices
}; };
ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options = Options());
struct Result {
Result(ValueType lower, ValueType upper);
ValueType lowerBound;
ValueType upperBound;
ValueType diff (bool relative = false) const;
};
ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options());
std::unique_ptr<POMDPCheckResult<ValueType>> check(storm::logic::Formula const& formula);
Result check(storm::logic::Formula const& formula);
void printStatisticsToStream(std::ostream& stream) const; void printStatisticsToStream(std::ostream& stream) const;
private: private:
/** /**
* Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
* Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
* *
* @param targetObservations the set of observations to be reached
* @param min true if minimum probability is to be computed
* @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
* @param targetObservations set of target observations
* @param min true if minimum value is to be computed
* @param observationResolutionVector vector containing the resolution to be used for each observation
* @param computeRewards true if rewards are to be computed, false if probability is computed
* @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
* @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
* @param maxUaModelSize the maximum size of the underapproximation model to be generated
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/ */
std::unique_ptr<POMDPCheckResult<ValueType>>
refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards);
void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
/** /**
* Compute the reachability probability of given target observations on a POMDP for the given resolution only.
* On-the-fly state space generation is used for the overapproximation
* Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop
* *
* @param targetObservations the set of observations to be reached * @param targetObservations the set of observations to be reached
* @param min true if minimum probability is to be computed * @param min true if minimum probability is to be computed
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
* @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/ */
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityProbabilityOTF(std::set<uint32_t> const &targetObservations, bool min);
void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, Result& result);
/** /**
* Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
* On-the-fly state space generation is used for the overapproximation
*
* @param targetObservations the set of observations to be reached
* @param min true if minimum rewards are to be computed
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
* Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties
*/ */
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min);
std::shared_ptr<ExplorerType> computeOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager);
void refineOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<uint64_t>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation);
private:
/** /**
* Helper method to compute the inital step of the refinement loop
*
* @param targetObservations set of target observations
* @param min true if minimum value is to be computed
* @param observationResolutionVector vector containing the resolution to be used for each observation
* @param computeRewards true if rewards are to be computed, false if probability is computed
* @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
* @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
* @param maxUaModelSize the maximum size of the underapproximation model to be generated
* @return struct containing components generated during the computation to be used in later refinement iterations
* Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties
*/ */
std::shared_ptr<RefinementComponents<ValueType>>
computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
std::shared_ptr<ExplorerType> computeUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager);
std::shared_ptr<RefinementComponents<ValueType>>
computeRefinementStep(std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, std::shared_ptr<RefinementComponents<ValueType>> refinementComponents,
std::set<uint32_t> changedObservations,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
void refineUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation);
/**
* Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size
*
* @param targetObservations set of target observations
* @param min true if minimum value is to be computed
* @param observationResolutionVector vector containing the resolution to be used for each observation
* @param computeRewards true if rewards are to be computed, false if probability is computed
* @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value
* @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value
* @param maxUaModelSize the maximum size of the underapproximation model to be generated
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards,
std::vector<ValueType> const& lowerPomdpValueBounds, std::vector<ValueType> const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200);
#ifdef REMOVE_THIS
/** /**
* Helper to compute an underapproximation of the reachability property. * Helper to compute an underapproximation of the reachability property.
* The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
@ -243,7 +217,8 @@ namespace storm {
*/ */
ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief); ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief); ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
#endif //REMOVE_THIS
struct Statistics { struct Statistics {
Statistics(); Statistics();
boost::optional<uint64_t> refinementSteps; boost::optional<uint64_t> refinementSteps;
@ -262,7 +237,7 @@ namespace storm {
}; };
Statistics statistics; Statistics statistics;
storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp;
PomdpModelType const& pomdp;
Options options; Options options;
storm::utility::ConstantsComparator<ValueType> cc; storm::utility::ConstantsComparator<ValueType> cc;
}; };

Loading…
Cancel
Save