49 changed files with 4474 additions and 279 deletions
-
71src/storm-pomdp-cli/settings/PomdpSettings.cpp
-
13src/storm-pomdp-cli/settings/PomdpSettings.h
-
180src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp
-
77src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h
-
41src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp
-
9src/storm-pomdp-cli/settings/modules/POMDPSettings.h
-
508src/storm-pomdp-cli/storm-pomdp.cpp
-
2src/storm-pomdp/CMakeLists.txt
-
59src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h
-
181src/storm-pomdp/analysis/FormulaInformation.cpp
-
69src/storm-pomdp/analysis/FormulaInformation.h
-
205src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp
-
75src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h
-
80src/storm-pomdp/analysis/QualitativeAnalysis.cpp
-
186src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp
-
74src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h
-
4src/storm-pomdp/analysis/UniqueObservationStates.cpp
-
840src/storm-pomdp/builder/BeliefMdpExplorer.h
-
45src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h
-
839src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
-
121src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
-
207src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h
-
12src/storm-pomdp/storage/Belief.h
-
507src/storm-pomdp/storage/BeliefManager.h
-
3src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp
-
2src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp
-
3src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp
-
3src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp
-
123src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp
-
17src/storm-pomdp/transformer/KnownProbabilityTransformer.h
-
18src/storm-pomdp/transformer/MakePOMDPCanonic.cpp
-
3src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp
-
4src/storm/adapters/MathsatExpressionAdapter.h
-
3src/storm/adapters/Z3ExpressionAdapter.cpp
-
11src/storm/models/sparse/Model.cpp
-
11src/storm/models/sparse/Model.h
-
37src/storm/models/sparse/Pomdp.cpp
-
16src/storm/models/sparse/Pomdp.h
-
1src/storm/settings/SettingsManager.cpp
-
35src/storm/solver/MathsatSmtSolver.cpp
-
6src/storm/solver/MathsatSmtSolver.h
-
4src/storm/solver/SmtSolver.h
-
5src/storm/solver/SmtlibSmtSolver.cpp
-
2src/storm/solver/SmtlibSmtSolver.h
-
13src/storm/solver/Z3SmtSolver.cpp
-
1src/storm/solver/Z3SmtSolver.h
-
12src/storm/storage/Distribution.cpp
-
5src/storm/storage/Distribution.h
-
10src/storm/utility/vector.h
@ -0,0 +1,71 @@ |
|||
#include "storm-pomdp-cli/settings/PomdpSettings.h"
|
|||
|
|||
#include "storm/settings/SettingsManager.h"
|
|||
|
|||
#include "storm/settings/modules/GeneralSettings.h"
|
|||
#include "storm/settings/modules/CoreSettings.h"
|
|||
#include "storm/settings/modules/IOSettings.h"
|
|||
#include "storm/settings/modules/DebugSettings.h"
|
|||
#include "storm/settings/modules/CuddSettings.h"
|
|||
#include "storm/settings/modules/SylvanSettings.h"
|
|||
#include "storm/settings/modules/EigenEquationSolverSettings.h"
|
|||
#include "storm/settings/modules/GmmxxEquationSolverSettings.h"
|
|||
#include "storm/settings/modules/NativeEquationSolverSettings.h"
|
|||
#include "storm/settings/modules/EliminationSettings.h"
|
|||
#include "storm/settings/modules/MinMaxEquationSolverSettings.h"
|
|||
#include "storm/settings/modules/GameSolverSettings.h"
|
|||
#include "storm/settings/modules/BisimulationSettings.h"
|
|||
#include "storm/settings/modules/GlpkSettings.h"
|
|||
#include "storm/settings/modules/GurobiSettings.h"
|
|||
#include "storm/settings/modules/Smt2SmtSolverSettings.h"
|
|||
#include "storm/settings/modules/ExplorationSettings.h"
|
|||
#include "storm/settings/modules/ResourceSettings.h"
|
|||
#include "storm/settings/modules/AbstractionSettings.h"
|
|||
#include "storm/settings/modules/BuildSettings.h"
|
|||
#include "storm/settings/modules/JitBuilderSettings.h"
|
|||
#include "storm/settings/modules/TopologicalEquationSolverSettings.h"
|
|||
#include "storm/settings/modules/ModelCheckerSettings.h"
|
|||
#include "storm/settings/modules/MultiplierSettings.h"
|
|||
#include "storm/settings/modules/TransformationSettings.h"
|
|||
#include "storm/settings/modules/MultiObjectiveSettings.h"
|
|||
#include "storm/settings/modules/HintSettings.h"
|
|||
#include "storm/settings/modules/OviSolverSettings.h"
|
|||
|
|||
#include "storm-pomdp-cli/settings/modules/POMDPSettings.h"
|
|||
#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h"
|
|||
|
|||
namespace storm { |
|||
namespace settings { |
|||
void initializePomdpSettings(std::string const& name, std::string const& executableName) { |
|||
storm::settings::mutableManager().setName(name, executableName); |
|||
|
|||
storm::settings::addModule<storm::settings::modules::GeneralSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::IOSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::CoreSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::DebugSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::BuildSettings>(); |
|||
|
|||
storm::settings::addModule<storm::settings::modules::POMDPSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::BeliefExplorationSettings>(); |
|||
|
|||
storm::settings::addModule<storm::settings::modules::TransformationSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::GmmxxEquationSolverSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::EigenEquationSolverSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::NativeEquationSolverSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::EliminationSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::MinMaxEquationSolverSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::GameSolverSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::BisimulationSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::GlpkSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::ExplorationSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::ResourceSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::JitBuilderSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::TopologicalEquationSolverSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::ModelCheckerSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::MultiplierSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::HintSettings>(); |
|||
storm::settings::addModule<storm::settings::modules::OviSolverSettings>(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,13 @@ |
|||
#pragma once |
|||
|
|||
#include <string> |
|||
|
|||
namespace storm { |
|||
namespace settings { |
|||
/*! |
|||
* Initialize the settings manager. |
|||
*/ |
|||
void initializePomdpSettings(std::string const& name, std::string const& executableName); |
|||
|
|||
} |
|||
} |
|||
@ -0,0 +1,180 @@ |
|||
#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h"
|
|||
|
|||
#include "storm/settings/SettingsManager.h"
|
|||
#include "storm/settings/SettingMemento.h"
|
|||
#include "storm/settings/Option.h"
|
|||
#include "storm/settings/OptionBuilder.h"
|
|||
#include "storm/settings/ArgumentBuilder.h"
|
|||
|
|||
#include "storm/utility/NumberTraits.h"
|
|||
#include "storm/adapters/RationalNumberAdapter.h"
|
|||
#include "storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h"
|
|||
|
|||
#include "storm/exceptions/InvalidArgumentException.h"
|
|||
|
|||
|
|||
namespace storm { |
|||
namespace settings { |
|||
namespace modules { |
|||
|
|||
const std::string BeliefExplorationSettings::moduleName = "belexpl"; |
|||
|
|||
const std::string refineOption = "refine"; |
|||
const std::string explorationTimeLimitOption = "exploration-time"; |
|||
const std::string resolutionOption = "resolution"; |
|||
const std::string sizeThresholdOption = "size-threshold"; |
|||
const std::string gapThresholdOption = "gap-threshold"; |
|||
const std::string schedulerThresholdOption = "scheduler-threshold"; |
|||
const std::string observationThresholdOption = "obs-threshold"; |
|||
const std::string numericPrecisionOption = "numeric-precision"; |
|||
const std::string triangulationModeOption = "triangulationmode"; |
|||
|
|||
BeliefExplorationSettings::BeliefExplorationSettings() : ModuleSettings(moduleName) { |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Refines the result bounds until reaching either the goal precision or the refinement step limit").addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("prec","The goal precision.").setDefaultValueDouble(1e-4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0.0)).build()).addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("steps","The number of allowed refinement steps (0 means no limit).").setDefaultValueUnsignedInteger(0).makeOptional().build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, explorationTimeLimitOption, false, "Sets after which time no further states shall be explored.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("time","In seconds.").build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(3).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, observationThresholdOption, false,"Only observations whose score is below this threshold will be refined.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Controlls how fast the threshold is increased in each refinement step (higher means more precise).").setDefaultValueDouble(0.1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, sizeThresholdOption, false,"Sets how many new states are explored or rewired in a refinement step and how this value is increased in case of refinement.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","initial limit (higher means more precise, 0 means automatic choice)").setDefaultValueUnsignedInteger(0).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Before each step the new threshold is set to the current state count times this number (higher means more precise).").setDefaultValueDouble(4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(1)).build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, gapThresholdOption, false,"Sets how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means less precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the gap in each refinement step (higher means less precise).").setDefaultValueDouble(0.25).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, schedulerThresholdOption, false,"Sets how much worse a sub-optimal choice can be in order to be included in the relevant explored fragment").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(1e-3).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the threshold in each refinement step (higher means more precise).").setDefaultValueDouble(1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(1)).build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").setIsAdvanced().addArgument( |
|||
storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); |
|||
|
|||
this->addOption(storm::settings::OptionBuilder(moduleName, triangulationModeOption, false,"Sets how to triangulate beliefs when discretizing.").setIsAdvanced().addArgument( |
|||
storm::settings::ArgumentBuilder::createStringArgument("value","the triangulation mode").setDefaultValueString("dynamic").addValidatorString(storm::settings::ArgumentValidatorFactory::createMultipleChoiceValidator({"dynamic", "static"})).build()).build()); |
|||
} |
|||
|
|||
bool BeliefExplorationSettings::isRefineSet() const { |
|||
return this->getOption(refineOption).getHasOptionBeenSet(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getRefinePrecision() const { |
|||
return this->getOption(refineOption).getArgumentByName("prec").getValueAsDouble(); |
|||
} |
|||
|
|||
bool BeliefExplorationSettings::isRefineStepLimitSet() const { |
|||
return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger() != 0; |
|||
} |
|||
|
|||
uint64_t BeliefExplorationSettings::getRefineStepLimit() const { |
|||
assert(isRefineStepLimitSet()); |
|||
return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger(); |
|||
} |
|||
|
|||
bool BeliefExplorationSettings::isExplorationTimeLimitSet() const { |
|||
return this->getOption(explorationTimeLimitOption).getHasOptionBeenSet(); |
|||
} |
|||
|
|||
uint64_t BeliefExplorationSettings::getExplorationTimeLimit() const { |
|||
return this->getOption(explorationTimeLimitOption).getArgumentByName("time").getValueAsUnsignedInteger(); |
|||
} |
|||
|
|||
uint64_t BeliefExplorationSettings::getResolutionInit() const { |
|||
return this->getOption(resolutionOption).getArgumentByName("init").getValueAsUnsignedInteger(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getResolutionFactor() const { |
|||
return this->getOption(resolutionOption).getArgumentByName("factor").getValueAsDouble(); |
|||
} |
|||
|
|||
uint64_t BeliefExplorationSettings::getSizeThresholdInit() const { |
|||
return this->getOption(sizeThresholdOption).getArgumentByName("init").getValueAsUnsignedInteger(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getSizeThresholdFactor() const { |
|||
return this->getOption(sizeThresholdOption).getArgumentByName("factor").getValueAsDouble(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getGapThresholdInit() const { |
|||
return this->getOption(gapThresholdOption).getArgumentByName("init").getValueAsDouble(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getGapThresholdFactor() const { |
|||
return this->getOption(gapThresholdOption).getArgumentByName("factor").getValueAsDouble(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getOptimalChoiceValueThresholdInit() const { |
|||
return this->getOption(schedulerThresholdOption).getArgumentByName("init").getValueAsDouble(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getOptimalChoiceValueThresholdFactor() const { |
|||
return this->getOption(schedulerThresholdOption).getArgumentByName("factor").getValueAsDouble(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getObservationScoreThresholdInit() const { |
|||
return this->getOption(observationThresholdOption).getArgumentByName("init").getValueAsDouble(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getObservationScoreThresholdFactor() const { |
|||
return this->getOption(observationThresholdOption).getArgumentByName("factor").getValueAsDouble(); |
|||
} |
|||
|
|||
bool BeliefExplorationSettings::isNumericPrecisionSetFromDefault() const { |
|||
return !this->getOption(numericPrecisionOption).getHasOptionBeenSet() || this->getOption(numericPrecisionOption).getArgumentByName("value").wasSetFromDefaultValue(); |
|||
} |
|||
|
|||
double BeliefExplorationSettings::getNumericPrecision() const { |
|||
return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); |
|||
} |
|||
|
|||
bool BeliefExplorationSettings::isDynamicTriangulationModeSet() const { |
|||
return this->getOption(triangulationModeOption).getArgumentByName("value").getValueAsString() == "dynamic"; |
|||
|
|||
} |
|||
bool BeliefExplorationSettings::isStaticTriangulationModeSet() const { |
|||
return this->getOption(triangulationModeOption).getArgumentByName("value").getValueAsString() == "static"; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions<ValueType>& options) const { |
|||
options.refine = isRefineSet(); |
|||
options.refinePrecision = getRefinePrecision(); |
|||
if (isRefineStepLimitSet()) { |
|||
options.refineStepLimit = getRefineStepLimit(); |
|||
} else { |
|||
options.refineStepLimit = boost::none; |
|||
} |
|||
if (isExplorationTimeLimitSet()) { |
|||
options.explorationTimeLimit = getExplorationTimeLimit(); |
|||
} else { |
|||
options.explorationTimeLimit = boost::none; |
|||
} |
|||
options.resolutionInit = getResolutionInit(); |
|||
options.resolutionFactor = storm::utility::convertNumber<ValueType>(getResolutionFactor()); |
|||
options.sizeThresholdInit = getSizeThresholdInit(); |
|||
options.sizeThresholdFactor = storm::utility::convertNumber<ValueType>(getSizeThresholdFactor()); |
|||
options.gapThresholdInit = storm::utility::convertNumber<ValueType>(getGapThresholdInit()); |
|||
options.gapThresholdFactor = storm::utility::convertNumber<ValueType>(getGapThresholdFactor()); |
|||
options.optimalChoiceValueThresholdInit = storm::utility::convertNumber<ValueType>(getOptimalChoiceValueThresholdInit()); |
|||
options.optimalChoiceValueThresholdFactor = storm::utility::convertNumber<ValueType>(getOptimalChoiceValueThresholdFactor()); |
|||
options.obsThresholdInit = storm::utility::convertNumber<ValueType>(getObservationScoreThresholdInit()); |
|||
options.obsThresholdIncrementFactor = storm::utility::convertNumber<ValueType>(getObservationScoreThresholdFactor()); |
|||
|
|||
options.numericPrecision = getNumericPrecision(); |
|||
if (storm::NumberTraits<ValueType>::IsExact) { |
|||
if (isNumericPrecisionSetFromDefault()) { |
|||
STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); |
|||
options.numericPrecision = storm::utility::zero<ValueType>(); |
|||
} else { |
|||
STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); |
|||
} |
|||
} |
|||
options.dynamicTriangulation = isDynamicTriangulationModeSet(); |
|||
} |
|||
|
|||
template void BeliefExplorationSettings::setValuesInOptionsStruct<double>(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions<double>& options) const; |
|||
template void BeliefExplorationSettings::setValuesInOptionsStruct<storm::RationalNumber>(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions<storm::RationalNumber>& options) const; |
|||
|
|||
|
|||
|
|||
} // namespace modules
|
|||
} // namespace settings
|
|||
} // namespace storm
|
|||
@ -0,0 +1,77 @@ |
|||
#pragma once |
|||
|
|||
#include "storm-config.h" |
|||
#include "storm/settings/modules/ModuleSettings.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
template<typename ValueType> |
|||
struct ApproximatePOMDPModelCheckerOptions; |
|||
} |
|||
} |
|||
|
|||
namespace settings { |
|||
namespace modules { |
|||
|
|||
/*! |
|||
* This class represents the settings for POMDP model checking. |
|||
*/ |
|||
class BeliefExplorationSettings : public ModuleSettings { |
|||
public: |
|||
|
|||
/*! |
|||
* Creates a new set of POMDP settings. |
|||
*/ |
|||
BeliefExplorationSettings(); |
|||
|
|||
virtual ~BeliefExplorationSettings() = default; |
|||
|
|||
bool isRefineSet() const; |
|||
double getRefinePrecision() const; |
|||
bool isRefineStepLimitSet() const; |
|||
uint64_t getRefineStepLimit() const; |
|||
|
|||
bool isExplorationTimeLimitSet() const; |
|||
uint64_t getExplorationTimeLimit() const; |
|||
|
|||
/// Discretization Resolution |
|||
uint64_t getResolutionInit() const; |
|||
double getResolutionFactor() const; |
|||
/// The maximal number of newly expanded MDP states in a refinement step |
|||
uint64_t getSizeThresholdInit() const; |
|||
double getSizeThresholdFactor() const; |
|||
|
|||
/// Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore |
|||
double getGapThresholdInit() const; |
|||
double getGapThresholdFactor() const; |
|||
|
|||
/// Controls whether "almost optimal" choices will be considered optimal |
|||
double getOptimalChoiceValueThresholdInit() const; |
|||
double getOptimalChoiceValueThresholdFactor() const; |
|||
|
|||
/// Controls which observations are refined. |
|||
double getObservationScoreThresholdInit() const; |
|||
double getObservationScoreThresholdFactor() const; |
|||
|
|||
/// Used to determine whether two beliefs are equal |
|||
bool isNumericPrecisionSetFromDefault() const; |
|||
double getNumericPrecision() const; |
|||
|
|||
bool isDynamicTriangulationModeSet() const; |
|||
bool isStaticTriangulationModeSet() const; |
|||
|
|||
template<typename ValueType> |
|||
void setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions<ValueType>& options) const; |
|||
|
|||
// The name of the module. |
|||
static const std::string moduleName; |
|||
|
|||
private: |
|||
|
|||
|
|||
}; |
|||
|
|||
} // namespace modules |
|||
} // namespace settings |
|||
} // namespace storm |
|||
@ -0,0 +1,59 @@ |
|||
#pragma once |
|||
|
|||
#include <boost/optional.hpp> |
|||
|
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/storage/BitVector.h" |
|||
#include "storm/storage/StronglyConnectedComponentDecomposition.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
|
|||
/*! |
|||
* This method tries to detect that the beliefmdp is finite. |
|||
* If this returns true, the beliefmdp is certainly finite. |
|||
* However, if this returns false, the beliefmdp might still be finite |
|||
* It is assumed that the belief MDP is not further explored when reaching a targetstate |
|||
*/ |
|||
template<typename ValueType> |
|||
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp<ValueType> const& pomdp, boost::optional<storm::storage::BitVector> const& targetStates) { |
|||
// All infinite paths of the POMDP (including the ones with prob. 0 ) either |
|||
// - reach a target state after finitely many steps or |
|||
// - after finitely many steps enter an SCC and do not leave it |
|||
// Hence, any path of the belief MDP will at some point either reach a target state or stay in a set of POMDP SCCs. |
|||
// Only in the latter case we can get infinitely many different belief states. |
|||
// Below, we check whether all SCCs only consist of Dirac distributions. |
|||
// If this is the case, no new belief states will be found at some point. |
|||
|
|||
// Get the SCC decomposition |
|||
storm::storage::StronglyConnectedComponentDecompositionOptions options; |
|||
options.dropNaiveSccs(); |
|||
storm::storage::BitVector relevantStates; |
|||
if (targetStates) { |
|||
relevantStates = ~targetStates.get(); |
|||
options.subsystem(&relevantStates); |
|||
} |
|||
storm::storage::StronglyConnectedComponentDecomposition<ValueType> sccs(pomdp.getTransitionMatrix(), options); |
|||
|
|||
// Check whether all choices that stay within an SCC have Dirac distributions |
|||
for (auto const& scc : sccs) { |
|||
for (auto const& sccState : scc) { |
|||
for (uint64_t rowIndex = pomdp.getNondeterministicChoiceIndices()[sccState]; rowIndex < pomdp.getNondeterministicChoiceIndices()[sccState + 1]; ++rowIndex) { |
|||
for (auto const& entry : pomdp.getTransitionMatrix().getRow(rowIndex)) { |
|||
if (!storm::utility::isOne(entry.getValue()) && !storm::utility::isZero(entry.getValue())) { |
|||
if (scc.containsState(entry.getColumn())) { |
|||
// There is a non-dirac choice that stays in the SCC. |
|||
// This could still mean that the belief MDP is finite |
|||
// e.g., if at some point the branches merge back to the same state |
|||
return false; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,181 @@ |
|||
#include "storm-pomdp/analysis/FormulaInformation.h"
|
|||
#include "storm/logic/Formulas.h"
|
|||
#include "storm/logic/FragmentSpecification.h"
|
|||
#include "storm/models/sparse/Pomdp.h"
|
|||
#include "storm/models/sparse/StandardRewardModel.h"
|
|||
#include "storm/modelchecker/propositional/SparsePropositionalModelChecker.h"
|
|||
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h"
|
|||
|
|||
#include "storm/utility/macros.h"
|
|||
#include "storm/exceptions/InvalidPropertyException.h"
|
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace analysis { |
|||
|
|||
bool FormulaInformation::StateSet::empty() const { |
|||
STORM_LOG_ASSERT(states.empty() == observations.empty(), "Inconsistent StateSet."); |
|||
return observations.empty(); |
|||
} |
|||
|
|||
FormulaInformation::FormulaInformation() : type(Type::Unsupported) { |
|||
// Intentionally left empty
|
|||
} |
|||
|
|||
FormulaInformation::FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, boost::optional<std::string> const& rewardModelName) : type(type), optimizationDirection(dir), rewardModelName(rewardModelName) { |
|||
STORM_LOG_ASSERT(!this->rewardModelName.is_initialized() || this->type == Type::NonNestedExpectedRewardFormula, "Got a reward model name for a non-reward formula."); |
|||
|
|||
} |
|||
|
|||
FormulaInformation::Type const& FormulaInformation::getType() const { |
|||
return type; |
|||
} |
|||
|
|||
bool FormulaInformation::isNonNestedReachabilityProbability() const { |
|||
return type == Type::NonNestedReachabilityProbability; |
|||
} |
|||
|
|||
bool FormulaInformation::isNonNestedExpectedRewardFormula() const { |
|||
return type == Type::NonNestedExpectedRewardFormula; |
|||
} |
|||
|
|||
bool FormulaInformation::isUnsupported() const { |
|||
return type == Type::Unsupported; |
|||
} |
|||
|
|||
typename FormulaInformation::StateSet const& FormulaInformation::getTargetStates() const { |
|||
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::NonNestedReachabilityProbability, "Target states requested for unexpected formula type."); |
|||
return targetStates.get(); |
|||
} |
|||
|
|||
typename FormulaInformation::StateSet const& FormulaInformation::getSinkStates() const { |
|||
STORM_LOG_ASSERT(this->type == Type::NonNestedReachabilityProbability, "Sink states requested for unexpected formula type."); |
|||
return sinkStates.get(); |
|||
} |
|||
|
|||
std::string const& FormulaInformation::getRewardModelName() const { |
|||
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula, "Reward model requested for unexpected formula type."); |
|||
return rewardModelName.get(); |
|||
} |
|||
|
|||
storm::solver::OptimizationDirection const& FormulaInformation::getOptimizationDirection() const { |
|||
return optimizationDirection; |
|||
} |
|||
|
|||
bool FormulaInformation::minimize() const { |
|||
return storm::solver::minimize(optimizationDirection); |
|||
} |
|||
|
|||
bool FormulaInformation::maximize() const { |
|||
return storm::solver::maximize(optimizationDirection); |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
FormulaInformation::StateSet getStateSet(PomdpType const& pomdp, storm::storage::BitVector&& inputStates) { |
|||
FormulaInformation::StateSet result; |
|||
result.states = std::move(inputStates); |
|||
for (auto const& state : result.states) { |
|||
result.observations.insert(pomdp.getObservation(state)); |
|||
} |
|||
// check if this set is observation-closed, i.e., whether there is a state outside of this set with one of the observations collected above
|
|||
result.observationClosed = true; |
|||
for (uint64_t state = result.states.getNextUnsetIndex(0); state < result.states.size(); state = result.states.getNextUnsetIndex(state + 1)) { |
|||
if (result.observations.count(pomdp.getObservation(state)) > 0) { |
|||
result.observationClosed = false; |
|||
break; |
|||
} |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
void FormulaInformation::updateTargetStates(PomdpType const& pomdp, storm::storage::BitVector&& newTargetStates) { |
|||
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::NonNestedReachabilityProbability, "Target states updated for unexpected formula type."); |
|||
targetStates = getStateSet(pomdp, std::move(newTargetStates)); |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
void FormulaInformation::updateSinkStates(PomdpType const& pomdp, storm::storage::BitVector&& newSinkStates) { |
|||
STORM_LOG_ASSERT(this->type == Type::NonNestedReachabilityProbability, "Sink states requested for unexpected formula type."); |
|||
sinkStates = getStateSet(pomdp, std::move(newSinkStates)); |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
storm::storage::BitVector getStates(storm::logic::Formula const& propositionalFormula, bool formulaInverted, PomdpType const& pomdp) { |
|||
storm::modelchecker::SparsePropositionalModelChecker<PomdpType> mc(pomdp); |
|||
auto checkResult = mc.check(propositionalFormula); |
|||
storm::storage::BitVector resultBitVector(checkResult->asExplicitQualitativeCheckResult().getTruthValuesVector()); |
|||
if (formulaInverted) { |
|||
resultBitVector.complement(); |
|||
} |
|||
return resultBitVector; |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::ProbabilityOperatorFormula const& formula) { |
|||
STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); |
|||
STORM_LOG_WARN_COND(!formula.hasBound(), "The probability threshold for the given property will be ignored."); |
|||
auto const& subformula = formula.getSubformula(); |
|||
std::shared_ptr<storm::logic::Formula const> targetStatesFormula, constraintsStatesFormula; |
|||
if (subformula.isEventuallyFormula()) { |
|||
targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); |
|||
constraintsStatesFormula = storm::logic::Formula::getTrueFormula()->asSharedPointer(); |
|||
} else if (subformula.isUntilFormula()) { |
|||
storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); |
|||
targetStatesFormula = untilFormula.getRightSubformula().asSharedPointer(); |
|||
constraintsStatesFormula = untilFormula.getLeftSubformula().asSharedPointer(); |
|||
} |
|||
if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional()) && constraintsStatesFormula && constraintsStatesFormula->isInFragment(storm::logic::propositional())) { |
|||
FormulaInformation result(FormulaInformation::Type::NonNestedReachabilityProbability, formula.getOptimalityType()); |
|||
result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); |
|||
result.updateSinkStates(pomdp, getStates(*constraintsStatesFormula, true, pomdp)); |
|||
return result; |
|||
} |
|||
return FormulaInformation(); |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::RewardOperatorFormula const& formula) { |
|||
STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); |
|||
STORM_LOG_WARN_COND(!formula.hasBound(), "The reward threshold for the given property will be ignored."); |
|||
std::string rewardModelName = ""; |
|||
if (formula.hasRewardModelName()) { |
|||
rewardModelName = formula.getRewardModelName(); |
|||
STORM_LOG_THROW(pomdp.hasRewardModel(rewardModelName), storm::exceptions::InvalidPropertyException, "Selected reward model with name '" << rewardModelName << "' does not exist."); |
|||
} else { |
|||
STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::InvalidPropertyException, "Reward operator formula does not specify a reward model and the reward model is not unique."); |
|||
rewardModelName = pomdp.getUniqueRewardModelName(); |
|||
} |
|||
auto const& subformula = formula.getSubformula(); |
|||
std::shared_ptr<storm::logic::Formula const> targetStatesFormula; |
|||
if (subformula.isEventuallyFormula()) { |
|||
targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); |
|||
} |
|||
if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional())) { |
|||
FormulaInformation result(FormulaInformation::Type::NonNestedExpectedRewardFormula, formula.getOptimalityType(), rewardModelName); |
|||
result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); |
|||
return result; |
|||
} |
|||
return FormulaInformation(); |
|||
} |
|||
|
|||
template <typename PomdpType> |
|||
FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::Formula const& formula) { |
|||
if (formula.isProbabilityOperatorFormula()) { |
|||
return getFormulaInformation(pomdp, formula.asProbabilityOperatorFormula()); |
|||
} else if (formula.isRewardOperatorFormula()) { |
|||
return getFormulaInformation(pomdp, formula.asRewardOperatorFormula()); |
|||
} |
|||
return FormulaInformation(); |
|||
} |
|||
|
|||
template void FormulaInformation::updateTargetStates<storm::models::sparse::Pomdp<double>>(storm::models::sparse::Pomdp<double> const& pomdp, storm::storage::BitVector&& newTargetStates); |
|||
template void FormulaInformation::updateSinkStates<storm::models::sparse::Pomdp<double>>(storm::models::sparse::Pomdp<double> const& pomdp, storm::storage::BitVector&& newSinkStates); |
|||
template FormulaInformation getFormulaInformation<storm::models::sparse::Pomdp<double>>(storm::models::sparse::Pomdp<double> const& pomdp, storm::logic::Formula const& formula); |
|||
template void FormulaInformation::updateTargetStates<storm::models::sparse::Pomdp<storm::RationalNumber>>(storm::models::sparse::Pomdp<storm::RationalNumber> const& pomdp, storm::storage::BitVector&& newTargetStates); |
|||
template void FormulaInformation::updateSinkStates<storm::models::sparse::Pomdp<storm::RationalNumber>>(storm::models::sparse::Pomdp<storm::RationalNumber> const& pomdp, storm::storage::BitVector&& newSinkStates); |
|||
template FormulaInformation getFormulaInformation<storm::models::sparse::Pomdp<storm::RationalNumber>>(storm::models::sparse::Pomdp<storm::RationalNumber> const& pomdp, storm::logic::Formula const& formula); |
|||
|
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,69 @@ |
|||
#pragma once |
|||
|
|||
#include <set> |
|||
#include <string> |
|||
#include <boost/optional.hpp> |
|||
|
|||
#include "storm/storage/BitVector.h" |
|||
#include "storm/solver/OptimizationDirection.h" |
|||
|
|||
namespace storm { |
|||
|
|||
namespace logic { |
|||
class Formula; |
|||
} |
|||
|
|||
namespace pomdp { |
|||
namespace analysis { |
|||
class FormulaInformation { |
|||
public: |
|||
/// Characterizes a certain set of states |
|||
struct StateSet { |
|||
storm::storage::BitVector states; // The set of states |
|||
std::set<uint32_t> observations; // The set of the observations that are assigned to at least one state of the set |
|||
bool observationClosed; // True iff this state set can be uniquely characterized by the observations |
|||
bool empty() const; |
|||
}; |
|||
|
|||
/// Possible supported formula types |
|||
enum class Type { |
|||
NonNestedReachabilityProbability, // e.g. 'Pmax=? [F "target"]' or 'Pmin=? [!"sink" U "target"]' |
|||
NonNestedExpectedRewardFormula, // e.g. 'Rmin=? [F x>0 ]' |
|||
Unsupported // The formula type is unsupported |
|||
}; |
|||
|
|||
FormulaInformation(); // Unsupported |
|||
FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, boost::optional<std::string> const& rewardModelName = boost::none); |
|||
|
|||
Type const& getType() const; |
|||
bool isNonNestedReachabilityProbability() const; |
|||
bool isNonNestedExpectedRewardFormula() const; |
|||
bool isUnsupported() const; |
|||
StateSet const& getTargetStates() const; |
|||
StateSet const& getSinkStates() const; // Shall not be called for reward formulas |
|||
std::string const& getRewardModelName() const; // Shall not be called for probability formulas |
|||
storm::solver::OptimizationDirection const& getOptimizationDirection() const; |
|||
bool minimize() const; |
|||
bool maximize() const; |
|||
|
|||
template <typename PomdpType> |
|||
void updateTargetStates(PomdpType const& pomdp, storm::storage::BitVector&& newTargetStates); |
|||
|
|||
template <typename PomdpType> |
|||
void updateSinkStates(PomdpType const& pomdp, storm::storage::BitVector&& newSinkStates); |
|||
|
|||
private: |
|||
Type type; |
|||
storm::solver::OptimizationDirection optimizationDirection; |
|||
boost::optional<StateSet> targetStates; |
|||
boost::optional<StateSet> sinkStates; |
|||
boost::optional<std::string> rewardModelName; |
|||
}; |
|||
|
|||
template <typename PomdpType> |
|||
FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::Formula const& formula); |
|||
|
|||
|
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,205 @@ |
|||
#include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h"
|
|||
|
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
|
|||
template <typename ValueType> |
|||
void MemlessStrategySearchQualitative<ValueType>::initialize(uint64_t k) { |
|||
if (maxK == std::numeric_limits<uint64_t>::max()) { |
|||
// not initialized at all.
|
|||
// Create some data structures.
|
|||
for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { |
|||
actionSelectionVars.push_back(std::vector<storm::expressions::Variable>()); |
|||
actionSelectionVarExpressions.push_back(std::vector<storm::expressions::Expression>()); |
|||
statesPerObservation.push_back(std::vector<uint64_t>()); // Consider using bitvectors instead.
|
|||
} |
|||
|
|||
// Fill the states-per-observation mapping,
|
|||
// declare the reachability variables,
|
|||
// declare the path variables.
|
|||
uint64_t stateId = 0; |
|||
for(auto obs : pomdp.getObservations()) { |
|||
pathVars.push_back(std::vector<storm::expressions::Expression>()); |
|||
for (uint64_t i = 0; i < k; ++i) { |
|||
pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); |
|||
} |
|||
reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId))); |
|||
reachVarExpressions.push_back(reachVars.back().getExpression()); |
|||
statesPerObservation.at(obs).push_back(stateId++); |
|||
} |
|||
assert(pathVars.size() == pomdp.getNumberOfStates()); |
|||
assert(reachVars.size() == pomdp.getNumberOfStates()); |
|||
assert(reachVarExpressions.size() == pomdp.getNumberOfStates()); |
|||
|
|||
// Create the action selection variables.
|
|||
uint64_t obs = 0; |
|||
for(auto const& statesForObservation : statesPerObservation) { |
|||
for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { |
|||
std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); |
|||
actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName)); |
|||
actionSelectionVarExpressions.at(obs).push_back(actionSelectionVars.at(obs).back().getExpression()); |
|||
} |
|||
++obs; |
|||
} |
|||
} else { |
|||
assert(false); |
|||
} |
|||
|
|||
uint64_t rowindex = 0; |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
if (targetStates.get(state)) { |
|||
smtSolver->add(pathVars[state][0]); |
|||
} else { |
|||
smtSolver->add(!pathVars[state][0]); |
|||
} |
|||
|
|||
if (surelyReachSinkStates.get(state)) { |
|||
smtSolver->add(!reachVarExpressions[state]); |
|||
} else if(!targetStates.get(state)) { |
|||
std::vector<std::vector<std::vector<storm::expressions::Expression>>> pathsubsubexprs; |
|||
for (uint64_t j = 1; j < k; ++j) { |
|||
pathsubsubexprs.push_back(std::vector<std::vector<storm::expressions::Expression>>()); |
|||
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { |
|||
pathsubsubexprs.back().push_back(std::vector<storm::expressions::Expression>()); |
|||
} |
|||
} |
|||
|
|||
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { |
|||
std::vector<storm::expressions::Expression> subexprreach; |
|||
|
|||
subexprreach.push_back(!reachVarExpressions.at(state)); |
|||
subexprreach.push_back(!actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action)); |
|||
for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { |
|||
subexprreach.push_back(reachVarExpressions.at(entries.getColumn())); |
|||
} |
|||
smtSolver->add(storm::expressions::disjunction(subexprreach)); |
|||
for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { |
|||
for (uint64_t j = 1; j < k; ++j) { |
|||
pathsubsubexprs[j - 1][action].push_back(pathVars[entries.getColumn()][j - 1]); |
|||
} |
|||
} |
|||
rowindex++; |
|||
} |
|||
smtSolver->add(storm::expressions::implies(reachVarExpressions.at(state), pathVars.at(state).back())); |
|||
|
|||
for (uint64_t j = 1; j < k; ++j) { |
|||
std::vector<storm::expressions::Expression> pathsubexprs; |
|||
|
|||
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { |
|||
pathsubexprs.push_back(actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1][action])); |
|||
} |
|||
smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); |
|||
} |
|||
} |
|||
} |
|||
|
|||
for (auto const& actionVars : actionSelectionVarExpressions) { |
|||
smtSolver->add(storm::expressions::disjunction(actionVars)); |
|||
} |
|||
} |
|||
|
|||
template <typename ValueType> |
|||
bool MemlessStrategySearchQualitative<ValueType>::analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates) { |
|||
if (k < maxK) { |
|||
initialize(k); |
|||
} |
|||
|
|||
std::vector<storm::expressions::Expression> atLeastOneOfStates; |
|||
|
|||
for (uint64_t state : oneOfTheseStates) { |
|||
STORM_LOG_ASSERT(reachVarExpressions.size() > state, "state id " << state << " exceeds number of states (" << reachVarExpressions.size() << ")" ); |
|||
atLeastOneOfStates.push_back(reachVarExpressions[state]); |
|||
} |
|||
assert(atLeastOneOfStates.size() > 0); |
|||
smtSolver->add(storm::expressions::disjunction(atLeastOneOfStates)); |
|||
|
|||
for (uint64_t state : allOfTheseStates) { |
|||
assert(reachVarExpressions.size() > state); |
|||
smtSolver->add(reachVarExpressions[state]); |
|||
} |
|||
|
|||
std::cout << smtSolver->getSmtLibString() << std::endl; |
|||
|
|||
|
|||
std::vector<std::set<uint64_t>> scheduler; |
|||
|
|||
while (true) { |
|||
|
|||
auto result = smtSolver->check(); |
|||
uint64_t i = 0; |
|||
|
|||
if (result == storm::solver::SmtSolver::CheckResult::Unknown) { |
|||
STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); |
|||
} else if (result == storm::solver::SmtSolver::CheckResult::Unsat) { |
|||
std::cout << std::endl << "Unsatisfiable!" << std::endl; |
|||
return false; |
|||
} |
|||
|
|||
std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; |
|||
auto model = smtSolver->getModel(); |
|||
std::cout << "states that are okay" << std::endl; |
|||
|
|||
|
|||
storm::storage::BitVector observations(pomdp.getNrObservations()); |
|||
storm::storage::BitVector remainingstates(pomdp.getNumberOfStates()); |
|||
for (auto rv : reachVars) { |
|||
if (model->getBooleanValue(rv)) { |
|||
std::cout << i << " " << std::endl; |
|||
observations.set(pomdp.getObservation(i)); |
|||
} else { |
|||
remainingstates.set(i); |
|||
} |
|||
//std::cout << i << ": " << model->getBooleanValue(rv) << ", ";
|
|||
++i; |
|||
} |
|||
|
|||
scheduler.clear(); |
|||
|
|||
std::vector<storm::expressions::Expression> schedulerSoFar; |
|||
uint64_t obs = 0; |
|||
for (auto const &actionSelectionVarsForObs : actionSelectionVars) { |
|||
uint64_t act = 0; |
|||
scheduler.push_back(std::set<uint64_t>()); |
|||
for (auto const &asv : actionSelectionVarsForObs) { |
|||
if (model->getBooleanValue(asv)) { |
|||
scheduler.back().insert(act); |
|||
schedulerSoFar.push_back(actionSelectionVarExpressions[obs][act]); |
|||
} |
|||
act++; |
|||
} |
|||
obs++; |
|||
} |
|||
|
|||
std::cout << "the scheduler: " << std::endl; |
|||
for (uint64_t obs = 0; obs < scheduler.size(); ++obs) { |
|||
if (observations.get(obs)) { |
|||
std::cout << "observation: " << obs << std::endl; |
|||
std::cout << "actions:"; |
|||
for (auto act : scheduler[obs]) { |
|||
std::cout << " " << act; |
|||
} |
|||
std::cout << std::endl; |
|||
} |
|||
} |
|||
|
|||
|
|||
std::vector<storm::expressions::Expression> remainingExpressions; |
|||
for (auto index : remainingstates) { |
|||
remainingExpressions.push_back(reachVarExpressions[index]); |
|||
} |
|||
|
|||
smtSolver->push(); |
|||
// Add scheduler
|
|||
smtSolver->add(storm::expressions::conjunction(schedulerSoFar)); |
|||
smtSolver->add(storm::expressions::disjunction(remainingExpressions)); |
|||
|
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
template class MemlessStrategySearchQualitative<double>; |
|||
template class MemlessStrategySearchQualitative<storm::RationalNumber>; |
|||
} |
|||
} |
|||
@ -0,0 +1,75 @@ |
|||
#include <vector> |
|||
#include "storm/storage/expressions/Expressions.h" |
|||
#include "storm/solver/SmtSolver.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/utility/solver.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
|
|||
template<typename ValueType> |
|||
class MemlessStrategySearchQualitative { |
|||
// Implements an extension to the Chatterjee, Chmelik, Davies (AAAI-16) paper. |
|||
|
|||
|
|||
public: |
|||
MemlessStrategySearchQualitative(storm::models::sparse::Pomdp<ValueType> const& pomdp, |
|||
std::set<uint32_t> const& targetObservationSet, |
|||
storm::storage::BitVector const& targetStates, |
|||
storm::storage::BitVector const& surelyReachSinkStates, |
|||
std::shared_ptr<storm::utility::solver::SmtSolverFactory>& smtSolverFactory) : |
|||
pomdp(pomdp), |
|||
targetObservations(targetObservationSet), |
|||
targetStates(targetStates), |
|||
surelyReachSinkStates(surelyReachSinkStates) { |
|||
this->expressionManager = std::make_shared<storm::expressions::ExpressionManager>(); |
|||
smtSolver = smtSolverFactory->create(*expressionManager); |
|||
|
|||
} |
|||
|
|||
void setSurelyReachSinkStates(storm::storage::BitVector const& surelyReachSink) { |
|||
surelyReachSinkStates = surelyReachSink; |
|||
} |
|||
|
|||
void analyzeForInitialStates(uint64_t k) { |
|||
analyze(k, pomdp.getInitialStates(), pomdp.getInitialStates()); |
|||
} |
|||
|
|||
void findNewStrategyForSomeState(uint64_t k) { |
|||
std::cout << surelyReachSinkStates << std::endl; |
|||
std::cout << targetStates << std::endl; |
|||
std::cout << (~surelyReachSinkStates & ~targetStates) << std::endl; |
|||
analyze(k, ~surelyReachSinkStates & ~targetStates); |
|||
|
|||
|
|||
} |
|||
|
|||
bool analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates = storm::storage::BitVector()); |
|||
|
|||
|
|||
private: |
|||
void initialize(uint64_t k); |
|||
|
|||
|
|||
std::unique_ptr<storm::solver::SmtSolver> smtSolver; |
|||
storm::models::sparse::Pomdp<ValueType> const& pomdp; |
|||
std::shared_ptr<storm::expressions::ExpressionManager> expressionManager; |
|||
uint64_t maxK = std::numeric_limits<uint64_t>::max(); |
|||
|
|||
std::set<uint32_t> targetObservations; |
|||
storm::storage::BitVector targetStates; |
|||
storm::storage::BitVector surelyReachSinkStates; |
|||
|
|||
std::vector<std::vector<uint64_t>> statesPerObservation; |
|||
std::vector<std::vector<storm::expressions::Expression>> actionSelectionVarExpressions; // A_{z,a} |
|||
std::vector<std::vector<storm::expressions::Variable>> actionSelectionVars; |
|||
std::vector<storm::expressions::Variable> reachVars; |
|||
std::vector<storm::expressions::Expression> reachVarExpressions; |
|||
std::vector<std::vector<storm::expressions::Expression>> pathVars; |
|||
|
|||
|
|||
|
|||
}; |
|||
} |
|||
} |
|||
@ -0,0 +1,186 @@ |
|||
|
|||
|
|||
#include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h"
|
|||
|
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
|
|||
template <typename ValueType> |
|||
void QualitativeStrategySearchNaive<ValueType>::initialize(uint64_t k) { |
|||
if (maxK == std::numeric_limits<uint64_t>::max()) { |
|||
// not initialized at all.
|
|||
// Create some data structures.
|
|||
for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { |
|||
actionSelectionVars.push_back(std::vector<storm::expressions::Variable>()); |
|||
actionSelectionVarExpressions.push_back(std::vector<storm::expressions::Expression>()); |
|||
statesPerObservation.push_back(std::vector<uint64_t>()); // Consider using bitvectors instead.
|
|||
} |
|||
|
|||
// Fill the states-per-observation mapping,
|
|||
// declare the reachability variables,
|
|||
// declare the path variables.
|
|||
uint64_t stateId = 0; |
|||
for(auto obs : pomdp.getObservations()) { |
|||
pathVars.push_back(std::vector<storm::expressions::Expression>()); |
|||
for (uint64_t i = 0; i < k; ++i) { |
|||
pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); |
|||
} |
|||
reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId))); |
|||
reachVarExpressions.push_back(reachVars.back().getExpression()); |
|||
statesPerObservation.at(obs).push_back(stateId++); |
|||
} |
|||
assert(pathVars.size() == pomdp.getNumberOfStates()); |
|||
|
|||
// Create the action selection variables.
|
|||
uint64_t obs = 0; |
|||
for(auto const& statesForObservation : statesPerObservation) { |
|||
for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { |
|||
std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); |
|||
actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName)); |
|||
actionSelectionVarExpressions.at(obs).push_back(actionSelectionVars.at(obs).back().getExpression()); |
|||
} |
|||
++obs; |
|||
} |
|||
} else { |
|||
assert(false); |
|||
} |
|||
|
|||
uint64_t rowindex = 0; |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
if (targetStates.get(state)) { |
|||
smtSolver->add(pathVars[state][0]); |
|||
} else { |
|||
smtSolver->add(!pathVars[state][0]); |
|||
} |
|||
|
|||
if (surelyReachSinkStates.get(state)) { |
|||
smtSolver->add(!reachVarExpressions[state]); |
|||
} else if(!targetStates.get(state)) { |
|||
std::vector<std::vector<std::vector<storm::expressions::Expression>>> pathsubsubexprs; |
|||
for (uint64_t j = 1; j < k; ++j) { |
|||
pathsubsubexprs.push_back(std::vector<std::vector<storm::expressions::Expression>>()); |
|||
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { |
|||
pathsubsubexprs.back().push_back(std::vector<storm::expressions::Expression>()); |
|||
} |
|||
} |
|||
|
|||
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { |
|||
std::vector<storm::expressions::Expression> subexprreach; |
|||
|
|||
subexprreach.push_back(!reachVarExpressions.at(state)); |
|||
subexprreach.push_back(!actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action)); |
|||
for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { |
|||
subexprreach.push_back(reachVarExpressions.at(entries.getColumn())); |
|||
} |
|||
smtSolver->add(storm::expressions::disjunction(subexprreach)); |
|||
for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { |
|||
for (uint64_t j = 1; j < k; ++j) { |
|||
pathsubsubexprs[j - 1][action].push_back(pathVars[entries.getColumn()][j - 1]); |
|||
} |
|||
} |
|||
rowindex++; |
|||
} |
|||
smtSolver->add(storm::expressions::implies(reachVarExpressions.at(state), pathVars.at(state).back())); |
|||
|
|||
for (uint64_t j = 1; j < k; ++j) { |
|||
std::vector<storm::expressions::Expression> pathsubexprs; |
|||
|
|||
for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { |
|||
pathsubexprs.push_back(actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1][action])); |
|||
} |
|||
smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); |
|||
} |
|||
} |
|||
} |
|||
|
|||
for (auto const& actionVars : actionSelectionVarExpressions) { |
|||
smtSolver->add(storm::expressions::disjunction(actionVars)); |
|||
} |
|||
} |
|||
|
|||
template <typename ValueType> |
|||
bool QualitativeStrategySearchNaive<ValueType>::analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates) { |
|||
if (k < maxK) { |
|||
initialize(k); |
|||
} |
|||
|
|||
std::vector<storm::expressions::Expression> atLeastOneOfStates; |
|||
|
|||
for(uint64_t state : oneOfTheseStates) { |
|||
atLeastOneOfStates.push_back(reachVarExpressions[state]); |
|||
} |
|||
assert(atLeastOneOfStates.size() > 0); |
|||
smtSolver->add(storm::expressions::disjunction(atLeastOneOfStates)); |
|||
|
|||
for(uint64_t state : allOfTheseStates) { |
|||
smtSolver->add(reachVarExpressions[state]); |
|||
} |
|||
|
|||
|
|||
|
|||
std::cout << smtSolver->getSmtLibString() << std::endl; |
|||
|
|||
auto result = smtSolver->check(); |
|||
uint64_t i = 0; |
|||
smtSolver->push(); |
|||
|
|||
|
|||
|
|||
if (result == storm::solver::SmtSolver::CheckResult::Unknown) { |
|||
STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); |
|||
} else if(result == storm::solver::SmtSolver::CheckResult::Unsat) { |
|||
std::cout << std::endl << "Unsatisfiable!" << std::endl; |
|||
return false; |
|||
} else { |
|||
|
|||
std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; |
|||
auto model = smtSolver->getModel(); |
|||
std::cout << "states that are okay" << std::endl; |
|||
storm::storage::BitVector observations(pomdp.getNrObservations()); |
|||
storm::storage::BitVector remainingstates(pomdp.getNumberOfStates()); |
|||
for (auto rv : reachVars) { |
|||
if (model->getBooleanValue(rv)) { |
|||
std::cout << i << " " << std::endl; |
|||
observations.set(pomdp.getObservation(i)); |
|||
} else { |
|||
remainingstates.set(i); |
|||
} |
|||
//std::cout << i << ": " << model->getBooleanValue(rv) << ", ";
|
|||
++i; |
|||
} |
|||
std::vector <std::set<uint64_t>> scheduler; |
|||
for (auto const &actionSelectionVarsForObs : actionSelectionVars) { |
|||
uint64_t act = 0; |
|||
scheduler.push_back(std::set<uint64_t>()); |
|||
for (auto const &asv : actionSelectionVarsForObs) { |
|||
if (model->getBooleanValue(asv)) { |
|||
scheduler.back().insert(act); |
|||
} |
|||
act++; |
|||
} |
|||
} |
|||
std::cout << "the scheduler: " << std::endl; |
|||
for (uint64_t obs = 0; obs < scheduler.size(); ++obs) { |
|||
if (observations.get(obs)) { |
|||
std::cout << "observation: " << obs << std::endl; |
|||
std::cout << "actions:"; |
|||
for (auto act : scheduler[obs]) { |
|||
std::cout << " " << act; |
|||
} |
|||
std::cout << std::endl; |
|||
} |
|||
} |
|||
|
|||
|
|||
return true; |
|||
} |
|||
|
|||
|
|||
|
|||
} |
|||
|
|||
template class QualitativeStrategySearchNaive<double>; |
|||
template class QualitativeStrategySearchNaive<storm::RationalNumber>; |
|||
} |
|||
} |
|||
@ -0,0 +1,74 @@ |
|||
#include <vector> |
|||
#include "storm/storage/expressions/Expressions.h" |
|||
#include "storm/solver/SmtSolver.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/utility/solver.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
|
|||
template<typename ValueType> |
|||
class QualitativeStrategySearchNaive { |
|||
// Implements an extension to the Chatterjee, Chmelik, Davies (AAAI-16) paper. |
|||
|
|||
|
|||
public: |
|||
QualitativeStrategySearchNaive(storm::models::sparse::Pomdp<ValueType> const& pomdp, |
|||
std::set<uint32_t> const& targetObservationSet, |
|||
storm::storage::BitVector const& targetStates, |
|||
storm::storage::BitVector const& surelyReachSinkStates, |
|||
std::shared_ptr<storm::utility::solver::SmtSolverFactory>& smtSolverFactory) : |
|||
pomdp(pomdp), |
|||
targetObservations(targetObservationSet), |
|||
targetStates(targetStates), |
|||
surelyReachSinkStates(surelyReachSinkStates) { |
|||
this->expressionManager = std::make_shared<storm::expressions::ExpressionManager>(); |
|||
smtSolver = smtSolverFactory->create(*expressionManager); |
|||
|
|||
} |
|||
|
|||
void setSurelyReachSinkStates(storm::storage::BitVector const& surelyReachSink) { |
|||
surelyReachSinkStates = surelyReachSink; |
|||
} |
|||
|
|||
void analyzeForInitialStates(uint64_t k) { |
|||
analyze(k, pomdp.getInitialStates(), pomdp.getInitialStates()); |
|||
} |
|||
|
|||
void findNewStrategyForSomeState(uint64_t k) { |
|||
std::cout << surelyReachSinkStates << std::endl; |
|||
std::cout << targetStates << std::endl; |
|||
std::cout << (~surelyReachSinkStates & ~targetStates) << std::endl; |
|||
analyze(k, ~surelyReachSinkStates & ~targetStates); |
|||
|
|||
|
|||
} |
|||
|
|||
bool analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates = storm::storage::BitVector()); |
|||
|
|||
private: |
|||
void initialize(uint64_t k); |
|||
|
|||
|
|||
std::unique_ptr<storm::solver::SmtSolver> smtSolver; |
|||
storm::models::sparse::Pomdp<ValueType> const& pomdp; |
|||
std::shared_ptr<storm::expressions::ExpressionManager> expressionManager; |
|||
uint64_t maxK = std::numeric_limits<uint64_t>::max(); |
|||
|
|||
std::set<uint32_t> targetObservations; |
|||
storm::storage::BitVector targetStates; |
|||
storm::storage::BitVector surelyReachSinkStates; |
|||
|
|||
std::vector<std::vector<uint64_t>> statesPerObservation; |
|||
std::vector<std::vector<storm::expressions::Expression>> actionSelectionVarExpressions; // A_{z,a} |
|||
std::vector<std::vector<storm::expressions::Variable>> actionSelectionVars; |
|||
std::vector<storm::expressions::Variable> reachVars; |
|||
std::vector<storm::expressions::Expression> reachVarExpressions; |
|||
std::vector<std::vector<storm::expressions::Expression>> pathVars; |
|||
|
|||
|
|||
|
|||
}; |
|||
} |
|||
} |
|||
@ -0,0 +1,840 @@ |
|||
#pragma once |
|||
|
|||
#include <memory> |
|||
#include <vector> |
|||
#include <deque> |
|||
#include <map> |
|||
#include <boost/optional.hpp> |
|||
|
|||
#include "storm-parsers/api/properties.h" |
|||
#include "storm/api/properties.h" |
|||
#include "storm/api/verification.h" |
|||
|
|||
#include "storm/storage/BitVector.h" |
|||
#include "storm/storage/SparseMatrix.h" |
|||
#include "storm/utility/macros.h" |
|||
#include "storm-pomdp/storage/BeliefManager.h" |
|||
#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" |
|||
#include "storm/utility/SignalHandler.h" |
|||
#include "storm/modelchecker/results/CheckResult.h" |
|||
#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" |
|||
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" |
|||
#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" |
|||
|
|||
|
|||
namespace storm { |
|||
namespace builder { |
|||
template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType> |
|||
class BeliefMdpExplorer { |
|||
public: |
|||
typedef typename PomdpType::ValueType ValueType; |
|||
typedef storm::storage::BeliefManager<PomdpType, BeliefValueType> BeliefManagerType; |
|||
typedef typename BeliefManagerType::BeliefId BeliefId; |
|||
typedef uint64_t MdpStateType; |
|||
|
|||
enum class Status { |
|||
Uninitialized, |
|||
Exploring, |
|||
ModelFinished, |
|||
ModelChecked |
|||
}; |
|||
|
|||
BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds) : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) { |
|||
// Intentionally left empty |
|||
} |
|||
BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; |
|||
|
|||
BeliefManagerType const& getBeliefManager() const { |
|||
return *beliefManager; |
|||
} |
|||
|
|||
void startNewExploration(boost::optional<ValueType> extraTargetStateValue = boost::none, boost::optional<ValueType> extraBottomStateValue = boost::none) { |
|||
status = Status::Exploring; |
|||
// Reset data from potential previous explorations |
|||
mdpStateToBeliefIdMap.clear(); |
|||
beliefIdToMdpStateMap.clear(); |
|||
exploredBeliefIds.clear(); |
|||
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
mdpStatesToExplore.clear(); |
|||
lowerValueBounds.clear(); |
|||
upperValueBounds.clear(); |
|||
values.clear(); |
|||
exploredMdpTransitions.clear(); |
|||
exploredChoiceIndices.clear(); |
|||
mdpActionRewards.clear(); |
|||
targetStates.clear(); |
|||
truncatedStates.clear(); |
|||
delayedExplorationChoices.clear(); |
|||
optimalChoices = boost::none; |
|||
optimalChoicesReachableMdpStates = boost::none; |
|||
exploredMdp = nullptr; |
|||
internalAddRowGroupIndex(); // Mark the start of the first row group |
|||
|
|||
// Add some states with special treatment (if requested) |
|||
if (extraBottomStateValue) { |
|||
currentMdpState = getCurrentNumberOfMdpStates(); |
|||
extraBottomState = currentMdpState; |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); |
|||
|
|||
internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one<ValueType>()); |
|||
internalAddRowGroupIndex(); |
|||
} else { |
|||
extraBottomState = boost::none; |
|||
} |
|||
if (extraTargetStateValue) { |
|||
currentMdpState = getCurrentNumberOfMdpStates(); |
|||
extraTargetState = currentMdpState; |
|||
mdpStateToBeliefIdMap.push_back(beliefManager->noId()); |
|||
insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); |
|||
|
|||
internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one<ValueType>()); |
|||
internalAddRowGroupIndex(); |
|||
|
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} else { |
|||
extraTargetState = boost::none; |
|||
} |
|||
currentMdpState = noState(); |
|||
|
|||
// Set up the initial state. |
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
/*! |
|||
* Restarts the exploration to allow re-exploring each state. |
|||
* After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty |
|||
* This method inserts the initial state of the MDP in the exploration queue. |
|||
* While re-exploring, the reference to the old MDP remains valid. |
|||
*/ |
|||
void restartExploration() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); |
|||
status = Status::Exploring; |
|||
// We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. |
|||
exploredBeliefIds.clear(); |
|||
exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); |
|||
exploredMdpTransitions.clear(); |
|||
exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); |
|||
exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
|||
mdpActionRewards.clear(); |
|||
if (exploredMdp->hasRewardModel()) { |
|||
// Can be overwritten during exploration |
|||
mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); |
|||
} |
|||
targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); |
|||
delayedExplorationChoices.clear(); |
|||
mdpStatesToExplore.clear(); |
|||
|
|||
// The extra states are not changed |
|||
if (extraBottomState) { |
|||
currentMdpState = extraBottomState.get(); |
|||
restoreOldBehaviorAtCurrentState(0); |
|||
} |
|||
if (extraTargetState) { |
|||
currentMdpState = extraTargetState.get(); |
|||
restoreOldBehaviorAtCurrentState(0); |
|||
targetStates.set(extraTargetState.get(), true); |
|||
} |
|||
currentMdpState = noState(); |
|||
|
|||
// Set up the initial state. |
|||
initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); |
|||
} |
|||
|
|||
bool hasUnexploredState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return !mdpStatesToExplore.empty(); |
|||
} |
|||
|
|||
BeliefId exploreNextState() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
// Mark the end of the previously explored row group. |
|||
if (currentMdpState != noState() && !currentStateHasOldBehavior()) { |
|||
internalAddRowGroupIndex(); |
|||
} |
|||
|
|||
// Pop from the queue. |
|||
currentMdpState = mdpStatesToExplore.front(); |
|||
mdpStatesToExplore.pop_front(); |
|||
|
|||
|
|||
return mdpStateToBeliefIdMap[currentMdpState]; |
|||
} |
|||
|
|||
void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero<ValueType>(), ValueType const& bottomStateValue = storm::utility::zero<ValueType>()) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
if (!storm::utility::isZero(bottomStateValue)) { |
|||
STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); |
|||
internalAddTransition(row, extraBottomState.get(), bottomStateValue); |
|||
} |
|||
if (!storm::utility::isZero(targetStateValue)) { |
|||
STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); |
|||
internalAddTransition(row, extraTargetState.get(), targetStateValue); |
|||
} |
|||
} |
|||
|
|||
void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one<ValueType>()) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
internalAddTransition(row, getCurrentMdpState(), value); |
|||
} |
|||
|
|||
/*! |
|||
* Adds the next transition to the given successor belief |
|||
* @param localActionIndex |
|||
* @param transitionTarget |
|||
* @param value |
|||
* @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. |
|||
* @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. |
|||
*/ |
|||
bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
|
|||
MdpStateType column; |
|||
if (ignoreNewBeliefs) { |
|||
column = getExploredMdpState(transitionTarget); |
|||
if (column == noState()) { |
|||
return false; |
|||
} |
|||
} else { |
|||
column = getOrAddMdpState(transitionTarget); |
|||
} |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
internalAddTransition(row, column, value); |
|||
return true; |
|||
} |
|||
|
|||
void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>()) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
} |
|||
uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; |
|||
mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; |
|||
} |
|||
|
|||
void setCurrentStateIsTarget() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
targetStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
targetStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
void setCurrentStateIsTruncated() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
truncatedStates.grow(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.set(getCurrentMdpState(), true); |
|||
} |
|||
|
|||
void setCurrentChoiceIsDelayed(uint64_t const& localActionIndex) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(), false); |
|||
delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex, true); |
|||
} |
|||
|
|||
bool currentStateHasOldBehavior() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); |
|||
return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); |
|||
} |
|||
|
|||
bool getCurrentStateWasTruncated() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); |
|||
return exploredMdp->getStateLabeling().getStateHasLabel("truncated", getCurrentMdpState()); |
|||
} |
|||
|
|||
/*! |
|||
* Retrieves whether the current state can be reached under an optimal scheduler |
|||
* This requires a previous call of computeOptimalChoicesAndReachableMdpStates. |
|||
*/ |
|||
bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
return optimalChoicesReachableMdpStates->get(mdpState); |
|||
} |
|||
|
|||
/*! |
|||
* Retrieves whether the given action at the current state was optimal in the most recent check. |
|||
* This requires a previous call of computeOptimalChoicesAndReachableMdpStates. |
|||
*/ |
|||
bool actionIsOptimal(uint64_t const& globalActionIndex) const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
return optimalChoices->get(globalActionIndex); |
|||
} |
|||
|
|||
/*! |
|||
* Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check. |
|||
* This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. |
|||
*/ |
|||
bool currentStateIsOptimalSchedulerReachable() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
return optimalChoicesReachableMdpStates->get(getCurrentMdpState()); |
|||
} |
|||
|
|||
/*! |
|||
* Retrieves whether the given action at the current state was optimal in the most recent check. |
|||
* This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. |
|||
*/ |
|||
bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); |
|||
uint64_t choice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
return optimalChoices->get(choice); |
|||
} |
|||
|
|||
bool getCurrentStateActionExplorationWasDelayed(uint64_t const& localActionIndex) const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); |
|||
STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); |
|||
uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex; |
|||
return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getChoiceHasLabel("delayed", choice); |
|||
} |
|||
|
|||
/*! |
|||
* Inserts transitions and rewards at the given action as in the MDP of the previous exploration. |
|||
* Does NOT set whether the state is truncated and/or target. |
|||
* Will add "old" states that have not been considered before into the exploration queue |
|||
* @param localActionIndex |
|||
*/ |
|||
void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); |
|||
STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); |
|||
|
|||
uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; |
|||
STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); |
|||
|
|||
// Insert the transitions |
|||
for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { |
|||
internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); |
|||
// Check whether exploration is needed |
|||
auto beliefId = getBeliefId(transition.getColumn()); |
|||
if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state |
|||
if (!exploredBeliefIds.get(beliefId)) { |
|||
// This belief needs exploration |
|||
exploredBeliefIds.set(beliefId, true); |
|||
mdpStatesToExplore.push_back(transition.getColumn()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values |
|||
} |
|||
|
|||
void finishExploration() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
|||
|
|||
// Complete the exploration |
|||
// Finish the last row grouping in case the last explored state was new |
|||
if (!currentStateHasOldBehavior()) { |
|||
internalAddRowGroupIndex(); |
|||
} |
|||
// Resize state- and choice based vectors to the correct size |
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
} |
|||
|
|||
// We are not exploring anymore |
|||
currentMdpState = noState(); |
|||
|
|||
// If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). |
|||
// We get rid of these before rebuilding the model |
|||
if (exploredMdp) { |
|||
dropUnexploredStates(); |
|||
} |
|||
|
|||
// The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore. |
|||
optimalChoices = boost::none; |
|||
optimalChoicesReachableMdpStates = boost::none; |
|||
|
|||
// Create the tranistion matrix |
|||
uint64_t entryCount = 0; |
|||
for (auto const& row : exploredMdpTransitions) { |
|||
entryCount += row.size(); |
|||
} |
|||
storm::storage::SparseMatrixBuilder<ValueType> builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); |
|||
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
|||
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
|||
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
|||
builder.newRowGroup(rowIndex); |
|||
for (; rowIndex < groupEnd; ++rowIndex) { |
|||
for (auto const& entry : exploredMdpTransitions[rowIndex]) { |
|||
builder.addNextValue(rowIndex, entry.first, entry.second); |
|||
} |
|||
} |
|||
} |
|||
auto mdpTransitionMatrix = builder.build(); |
|||
|
|||
// Create a standard labeling |
|||
storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); |
|||
mdpLabeling.addLabel("init"); |
|||
mdpLabeling.addLabelToState("init", initialMdpState); |
|||
targetStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("target", std::move(targetStates)); |
|||
truncatedStates.resize(getCurrentNumberOfMdpStates(), false); |
|||
mdpLabeling.addLabel("truncated", std::move(truncatedStates)); |
|||
|
|||
// Create a standard reward model (if rewards are available) |
|||
std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels; |
|||
if (!mdpActionRewards.empty()) { |
|||
mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>()); |
|||
mdpRewardModels.emplace("default", |
|||
storm::models::sparse::StandardRewardModel<ValueType>(boost::optional<std::vector<ValueType>>(), std::move(mdpActionRewards))); |
|||
} |
|||
|
|||
// Create model components |
|||
storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); |
|||
|
|||
// Potentially create a choice labeling |
|||
if (!delayedExplorationChoices.empty()) { |
|||
modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices()); |
|||
delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(), false); |
|||
modelComponents.choiceLabeling->addLabel("delayed", std::move(delayedExplorationChoices)); |
|||
} |
|||
|
|||
// Create the final model. |
|||
exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents)); |
|||
status = Status::ModelFinished; |
|||
STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); |
|||
|
|||
} |
|||
|
|||
void dropUnexploredStates() { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); |
|||
|
|||
STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); |
|||
// Find the states (and corresponding choices) that were not explored. |
|||
// These correspond to "empty" MDP transitions |
|||
storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); |
|||
std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); |
|||
MdpStateType nextRelevantIndex = 0; |
|||
for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { |
|||
uint64_t rowIndex = exploredChoiceIndices[groupIndex]; |
|||
// Check first row in group |
|||
if (exploredMdpTransitions[rowIndex].empty()) { |
|||
relevantMdpChoices.set(rowIndex, false); |
|||
relevantMdpStates.set(groupIndex, false); |
|||
} else { |
|||
toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; |
|||
++nextRelevantIndex; |
|||
} |
|||
uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; |
|||
// process remaining rows in group |
|||
for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { |
|||
// Assert that all actions at the current state were consistently explored or unexplored. |
|||
STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); |
|||
if (exploredMdpTransitions[rowIndex].empty()) { |
|||
relevantMdpChoices.set(rowIndex, false); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (relevantMdpStates.full()) { |
|||
// All states are relevant so nothing to do |
|||
return; |
|||
} |
|||
|
|||
// Translate various components to the "new" MDP state set |
|||
storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); |
|||
{ // beliefIdToMdpStateMap |
|||
for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { |
|||
if (relevantMdpStates.get(belIdToMdpStateIt->second)) { |
|||
// Translate current entry and move on to the next one. |
|||
belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; |
|||
++belIdToMdpStateIt; |
|||
} else { |
|||
STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); |
|||
// Delete current entry and move on to the next one. |
|||
// This works because std::map::erase does not invalidate other iterators within the map! |
|||
beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); |
|||
} |
|||
} |
|||
} |
|||
{ // exploredMdpTransitions |
|||
storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); |
|||
// Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map |
|||
// It might payoff to do this when building the matrix. |
|||
for (auto& transitions : exploredMdpTransitions) { |
|||
std::map<MdpStateType, ValueType> newTransitions; |
|||
for (auto const& entry : transitions) { |
|||
STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); |
|||
newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); |
|||
} |
|||
transitions = std::move(newTransitions); |
|||
} |
|||
} |
|||
{ // exploredChoiceIndices |
|||
MdpStateType newState = 0; |
|||
assert(exploredChoiceIndices[0] == 0u); |
|||
// Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. |
|||
for (auto const& oldState : relevantMdpStates) { |
|||
if (oldState != newState) { |
|||
assert(oldState > newState); |
|||
uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; |
|||
exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; |
|||
} |
|||
++newState; |
|||
} |
|||
exploredChoiceIndices.resize(newState + 1); |
|||
} |
|||
if (!mdpActionRewards.empty()) { |
|||
storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); |
|||
} |
|||
if (extraBottomState) { |
|||
extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; |
|||
} |
|||
if (extraTargetState) { |
|||
extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; |
|||
} |
|||
targetStates = targetStates % relevantMdpStates; |
|||
truncatedStates = truncatedStates % relevantMdpStates; |
|||
initialMdpState = toRelevantStateIndexMap[initialMdpState]; |
|||
|
|||
storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); |
|||
storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); |
|||
storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); |
|||
|
|||
} |
|||
|
|||
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); |
|||
return exploredMdp; |
|||
} |
|||
|
|||
MdpStateType getCurrentNumberOfMdpStates() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return mdpStateToBeliefIdMap.size(); |
|||
} |
|||
|
|||
MdpStateType getCurrentNumberOfMdpChoices() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return exploredMdpTransitions.size(); |
|||
} |
|||
|
|||
MdpStateType getStartOfCurrentRowGroup() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return exploredChoiceIndices[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType getLowerValueBoundAtCurrentState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return lowerValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType getUpperValueBoundAtCurrentState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return upperValueBounds[getCurrentMdpState()]; |
|||
} |
|||
|
|||
ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { |
|||
STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available."); |
|||
auto it = pomdpValueBounds.lower.begin(); |
|||
ValueType result = beliefManager->getWeightedSum(beliefId, *it); |
|||
for (++it; it != pomdpValueBounds.lower.end(); ++it) { |
|||
result = std::max(result, beliefManager->getWeightedSum(beliefId, *it)); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { |
|||
STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available."); |
|||
auto it = pomdpValueBounds.upper.begin(); |
|||
ValueType result = beliefManager->getWeightedSum(beliefId, *it); |
|||
for (++it; it != pomdpValueBounds.upper.end(); ++it) { |
|||
result = std::min(result, beliefManager->getWeightedSum(beliefId, *it)); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { |
|||
STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); |
|||
auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); |
|||
auto task = createStandardCheckTask(property); |
|||
|
|||
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(exploredMdp, task)); |
|||
if (res) { |
|||
values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal<ValueType>()), "Computed values are smaller than the lower bound."); |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal<ValueType>()), "Computed values are larger than the upper bound."); |
|||
} else { |
|||
STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); |
|||
STORM_LOG_ERROR("No result obtained while checking."); |
|||
} |
|||
status = Status::ModelChecked; |
|||
} |
|||
|
|||
bool hasComputedValues() const { |
|||
return status == Status::ModelChecked; |
|||
} |
|||
|
|||
std::vector<ValueType> const& getValuesOfExploredMdp() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
return values; |
|||
} |
|||
|
|||
ValueType const& getComputedValueAtInitialState() const { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); |
|||
return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; |
|||
} |
|||
|
|||
MdpStateType getBeliefId(MdpStateType exploredMdpState) const { |
|||
STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); |
|||
return mdpStateToBeliefIdMap[exploredMdpState]; |
|||
} |
|||
|
|||
struct SuccessorObservationInformation { |
|||
SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { |
|||
// Intentionally left empty. |
|||
} |
|||
|
|||
void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons) |
|||
observationProbability += other.observationProbability; |
|||
maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); |
|||
successorWithObsCount += other.successorWithObsCount; |
|||
} |
|||
|
|||
ValueType observationProbability; /// The probability we move to the corresponding observation. |
|||
ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. |
|||
uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation |
|||
typename BeliefManagerType::BeliefSupportType support; |
|||
}; |
|||
|
|||
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
|||
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); |
|||
|
|||
} |
|||
|
|||
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation>& gatheredSuccessorObservations) { |
|||
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); |
|||
for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
|||
auto const& beliefId = getBeliefId(entry.getColumn()); |
|||
if (beliefId != beliefManager->noId()) { |
|||
auto const& obs = beliefManager->getBeliefObservation(beliefId); |
|||
SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); |
|||
auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); |
|||
if (!obsInsertion.second) { |
|||
// There already is an entry for this observation, so join the two informations |
|||
obsInsertion.first->second.join(info); |
|||
} |
|||
beliefManager->joinSupport(beliefId, obsInsertion.first->second.support); |
|||
} |
|||
} |
|||
} |
|||
|
|||
bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const& observationSet) { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); |
|||
uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; |
|||
for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { |
|||
auto const& beliefId = getBeliefId(entry.getColumn()); |
|||
if (observationSet.get(beliefManager->getBeliefObservation(beliefId))) { |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
void takeCurrentValuesAsUpperBounds() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
upperValueBounds = values; |
|||
} |
|||
|
|||
void takeCurrentValuesAsLowerBounds() { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
lowerValueBounds = values; |
|||
} |
|||
|
|||
/*! |
|||
* |
|||
* Computes the set of states that are reachable via a path that is consistent with an optimal MDP scheduler. |
|||
* States that are only reachable via target states will not be in this set. |
|||
* @param ancillaryChoicesEpsilon if the difference of a 1-step value of a choice is only epsilon away from the optimal value, the choice will be included. |
|||
* @param relative if set, we consider the relative difference to detect ancillaryChoices |
|||
*/ |
|||
void computeOptimalChoicesAndReachableMdpStates(ValueType const& ancillaryChoicesEpsilon, bool relativeDifference) { |
|||
STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); |
|||
STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available."); |
|||
STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before."); |
|||
STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), "Tried to compute states that are reachable under an optimal scheduler but this has already been done before."); |
|||
|
|||
// First find the choices that are optimal |
|||
optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false); |
|||
auto const& choiceIndices = exploredMdp->getNondeterministicChoiceIndices(); |
|||
auto const& transitions = exploredMdp->getTransitionMatrix(); |
|||
auto const& targetStates = exploredMdp->getStates("target"); |
|||
for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) { |
|||
if (targetStates.get(mdpState)) { |
|||
// Target states can be skipped. |
|||
continue; |
|||
} else { |
|||
auto const& stateValue = values[mdpState]; |
|||
for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) { |
|||
ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values); |
|||
if (exploredMdp->hasRewardModel()) { |
|||
choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice); |
|||
} |
|||
ValueType absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue)); |
|||
if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) { |
|||
optimalChoices->set(globalChoice, true); |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice."); |
|||
} |
|||
} |
|||
|
|||
// Then, find the states that are reachable via these choices |
|||
optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, optimalChoices.get()); |
|||
} |
|||
|
|||
private: |
|||
MdpStateType noState() const { |
|||
return std::numeric_limits<MdpStateType>::max(); |
|||
} |
|||
|
|||
std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { |
|||
std::string propertyString = computeRewards ? "R" : "P"; |
|||
propertyString += storm::solver::minimize(dir) ? "min" : "max"; |
|||
propertyString += "=? [F \"target\"]"; |
|||
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString); |
|||
return storm::api::extractFormulasFromProperties(propertyVector).front(); |
|||
} |
|||
|
|||
storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const>& property) { |
|||
//Note: The property should not run out of scope after calling this because the task only stores the property by reference. |
|||
// Therefore, this method needs the property by reference (and not const reference) |
|||
auto task = storm::api::createTask<ValueType>(property, false); |
|||
auto hint = storm::modelchecker::ExplicitModelCheckerHint<ValueType>(); |
|||
hint.setResultHint(values); |
|||
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint); |
|||
task.setHint(hintPtr); |
|||
return task; |
|||
} |
|||
|
|||
MdpStateType getCurrentMdpState() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return currentMdpState; |
|||
} |
|||
|
|||
MdpStateType getCurrentBeliefId() const { |
|||
STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); |
|||
return getBeliefId(getCurrentMdpState()); |
|||
} |
|||
|
|||
void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { |
|||
STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); |
|||
if (row == exploredMdpTransitions.size()) { |
|||
exploredMdpTransitions.emplace_back(); |
|||
} |
|||
STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); |
|||
exploredMdpTransitions[row][column] = value; |
|||
} |
|||
|
|||
void internalAddRowGroupIndex() { |
|||
exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); |
|||
} |
|||
|
|||
MdpStateType getExploredMdpState(BeliefId const& beliefId) const { |
|||
if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { |
|||
return beliefIdToMdpStateMap.at(beliefId); |
|||
} else { |
|||
return noState(); |
|||
} |
|||
} |
|||
|
|||
void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { |
|||
lowerValueBounds.push_back(lowerBound); |
|||
upperValueBounds.push_back(upperBound); |
|||
// Take the middle value as a hint |
|||
values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2)); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); |
|||
STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); |
|||
} |
|||
|
|||
MdpStateType getOrAddMdpState(BeliefId const& beliefId) { |
|||
exploredBeliefIds.grow(beliefId + 1, false); |
|||
if (exploredBeliefIds.get(beliefId)) { |
|||
return beliefIdToMdpStateMap[beliefId]; |
|||
} else { |
|||
// This state needs exploration |
|||
exploredBeliefIds.set(beliefId, true); |
|||
|
|||
// If this is a restart of the exploration, we still might have an MDP state for the belief |
|||
if (exploredMdp) { |
|||
auto findRes = beliefIdToMdpStateMap.find(beliefId); |
|||
if (findRes != beliefIdToMdpStateMap.end()) { |
|||
mdpStatesToExplore.push_back(findRes->second); |
|||
return findRes->second; |
|||
} |
|||
} |
|||
// At this point we need to add a new MDP state |
|||
MdpStateType result = getCurrentNumberOfMdpStates(); |
|||
assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); |
|||
mdpStateToBeliefIdMap.push_back(beliefId); |
|||
beliefIdToMdpStateMap[beliefId] = result; |
|||
insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); |
|||
mdpStatesToExplore.push_back(result); |
|||
return result; |
|||
} |
|||
} |
|||
|
|||
// Belief state related information |
|||
std::shared_ptr<BeliefManagerType> beliefManager; |
|||
std::vector<BeliefId> mdpStateToBeliefIdMap; |
|||
std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap; |
|||
storm::storage::BitVector exploredBeliefIds; |
|||
|
|||
// Exploration information |
|||
std::deque<uint64_t> mdpStatesToExplore; |
|||
std::vector<std::map<MdpStateType, ValueType>> exploredMdpTransitions; |
|||
std::vector<MdpStateType> exploredChoiceIndices; |
|||
std::vector<ValueType> mdpActionRewards; |
|||
uint64_t currentMdpState; |
|||
|
|||
// Special states and choices during exploration |
|||
boost::optional<MdpStateType> extraTargetState; |
|||
boost::optional<MdpStateType> extraBottomState; |
|||
storm::storage::BitVector targetStates; |
|||
storm::storage::BitVector truncatedStates; |
|||
MdpStateType initialMdpState; |
|||
storm::storage::BitVector delayedExplorationChoices; |
|||
|
|||
// Final Mdp |
|||
std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp; |
|||
|
|||
// Value and scheduler related information |
|||
storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> pomdpValueBounds; |
|||
std::vector<ValueType> lowerValueBounds; |
|||
std::vector<ValueType> upperValueBounds; |
|||
std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed |
|||
boost::optional<storm::storage::BitVector> optimalChoices; |
|||
boost::optional<storm::storage::BitVector> optimalChoicesReachableMdpStates; |
|||
|
|||
// The current status of this explorer |
|||
Status status; |
|||
}; |
|||
} |
|||
} |
|||
@ -0,0 +1,45 @@ |
|||
#pragma once |
|||
|
|||
#include <boost/optional.hpp> |
|||
#include "storm/utility/constants.h" |
|||
#include "storm/utility/NumberTraits.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
template<typename ValueType> |
|||
struct ApproximatePOMDPModelCheckerOptions { |
|||
ApproximatePOMDPModelCheckerOptions(bool discretize, bool unfold) : discretize(discretize), unfold(unfold) { |
|||
// Intentionally left empty |
|||
} |
|||
|
|||
bool discretize; |
|||
bool unfold; |
|||
bool refine = false; |
|||
boost::optional<uint64_t> refineStepLimit; |
|||
ValueType refinePrecision = storm::utility::zero<ValueType>(); |
|||
boost::optional<uint64_t> explorationTimeLimit; |
|||
|
|||
// Controlparameters for the refinement heuristic |
|||
// Discretization Resolution |
|||
uint64_t resolutionInit = 2; |
|||
ValueType resolutionFactor = storm::utility::convertNumber<ValueType, uint64_t>(2); |
|||
// The maximal number of newly expanded MDP states in a refinement step |
|||
uint64_t sizeThresholdInit = 0; |
|||
ValueType sizeThresholdFactor = storm::utility::convertNumber<ValueType,uint64_t>(4); |
|||
// Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore |
|||
ValueType gapThresholdInit = storm::utility::convertNumber<ValueType>(0.1); |
|||
ValueType gapThresholdFactor = storm::utility::convertNumber<ValueType>(0.25); |
|||
// Controls whether "almost optimal" choices will be considered optimal |
|||
ValueType optimalChoiceValueThresholdInit = storm::utility::convertNumber<ValueType>(1e-3); |
|||
ValueType optimalChoiceValueThresholdFactor = storm::utility::one<ValueType>(); |
|||
// Controls which observations are refined. |
|||
ValueType obsThresholdInit = storm::utility::convertNumber<ValueType>(0.1); |
|||
ValueType obsThresholdIncrementFactor = storm::utility::convertNumber<ValueType>(0.1); |
|||
|
|||
ValueType numericPrecision = storm::NumberTraits<ValueType>::IsExact ? storm::utility::zero<ValueType>() : storm::utility::convertNumber<ValueType>(1e-9); /// Used to decide whether two beliefs are equal |
|||
bool dynamicTriangulation = true; // Sets whether the triangulation is done in a dynamic way (yielding more precise triangulations) |
|||
}; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,839 @@ |
|||
#include "ApproximatePOMDPModelchecker.h"
|
|||
|
|||
#include <tuple>
|
|||
|
|||
#include <boost/algorithm/string.hpp>
|
|||
|
|||
#include "storm-pomdp/analysis/FormulaInformation.h"
|
|||
#include "storm-pomdp/analysis/FiniteBeliefMdpDetection.h"
|
|||
|
|||
#include "storm/utility/ConstantsComparator.h"
|
|||
#include "storm/utility/NumberTraits.h"
|
|||
#include "storm/utility/graph.h"
|
|||
#include "storm/logic/Formulas.h"
|
|||
|
|||
#include "storm/models/sparse/Dtmc.h"
|
|||
#include "storm/models/sparse/StandardRewardModel.h"
|
|||
#include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h"
|
|||
#include "storm/utility/vector.h"
|
|||
#include "storm/api/properties.h"
|
|||
#include "storm/api/export.h"
|
|||
#include "storm-pomdp/builder/BeliefMdpExplorer.h"
|
|||
#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h"
|
|||
|
|||
#include "storm/utility/macros.h"
|
|||
#include "storm/utility/SignalHandler.h"
|
|||
#include "storm/exceptions/NotSupportedException.h"
|
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { |
|||
// Intentionally left empty
|
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ValueType |
|||
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::diff(bool relative) const { |
|||
ValueType diff = upperBound - lowerBound; |
|||
if (diff < storm::utility::zero<ValueType>()) { |
|||
STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); |
|||
diff = storm::utility::zero<ValueType >(); |
|||
} |
|||
if (relative && !storm::utility::isZero(upperBound)) { |
|||
diff /= upperBound; |
|||
} |
|||
return diff; |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::updateLowerBound(ValueType const& value) { |
|||
if (value > lowerBound) { |
|||
lowerBound = value; |
|||
return true; |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result::updateUpperBound(ValueType const& value) { |
|||
if (value < upperBound) { |
|||
upperBound = value; |
|||
return true; |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { |
|||
// intentionally left empty;
|
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { |
|||
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false); |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
typename ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::Result ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::check(storm::logic::Formula const& formula) { |
|||
STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given."); |
|||
// Reset all collected statistics
|
|||
statistics = Statistics(); |
|||
statistics.totalTime.start(); |
|||
// Extract the relevant information from the formula
|
|||
auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); |
|||
|
|||
// Compute some initial bounds on the values for each state of the pomdp
|
|||
auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker<storm::models::sparse::Pomdp<ValueType>>(pomdp).getValueBounds(formula, formulaInfo); |
|||
uint64_t initialPomdpState = pomdp.getInitialStates().getNextSetIndex(0); |
|||
Result result(initialPomdpValueBounds.getHighestLowerBound(initialPomdpState), initialPomdpValueBounds.getSmallestUpperBound(initialPomdpState)); |
|||
STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]" << std::endl); |
|||
|
|||
boost::optional<std::string> rewardModelName; |
|||
if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { |
|||
// FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
|
|||
STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); |
|||
if (formulaInfo.isNonNestedReachabilityProbability()) { |
|||
if (!formulaInfo.getSinkStates().empty()) { |
|||
auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); |
|||
reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; |
|||
STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); |
|||
} |
|||
} else { |
|||
// Expected reward formula!
|
|||
rewardModelName = formulaInfo.getRewardModelName(); |
|||
} |
|||
} else { |
|||
STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); |
|||
} |
|||
if (storm::pomdp::detectFiniteBeliefMdp(pomdp, formulaInfo.getTargetStates().states)) { |
|||
STORM_PRINT_AND_LOG("Detected that the belief MDP is finite." << std::endl); |
|||
} |
|||
|
|||
if (options.refine) { |
|||
refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result); |
|||
} else { |
|||
computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result); |
|||
} |
|||
// "clear" results in case they were actually not requested (this will make the output a bit more clear)
|
|||
if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) { |
|||
result.lowerBound = -storm::utility::infinity<ValueType>(); |
|||
} |
|||
if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) { |
|||
result.upperBound = storm::utility::infinity<ValueType>(); |
|||
} |
|||
|
|||
if (storm::utility::resources::isTerminate()) { |
|||
statistics.aborted = true; |
|||
} |
|||
statistics.totalTime.stop(); |
|||
return result; |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::printStatisticsToStream(std::ostream& stream) const { |
|||
stream << "##### Grid Approximation Statistics ######" << std::endl; |
|||
stream << "# Input model: " << std::endl; |
|||
pomdp.printModelInformationToStream(stream); |
|||
stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; |
|||
|
|||
if (statistics.aborted) { |
|||
stream << "# Computation aborted early" << std::endl; |
|||
} |
|||
|
|||
stream << "# Total check time: " << statistics.totalTime << std::endl; |
|||
|
|||
// Refinement information:
|
|||
if (statistics.refinementSteps) { |
|||
stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; |
|||
} |
|||
|
|||
// The overapproximation MDP:
|
|||
if (statistics.overApproximationStates) { |
|||
stream << "# Number of states in the "; |
|||
if (options.refine) { |
|||
stream << "final "; |
|||
} |
|||
stream << "grid MDP for the over-approximation: "; |
|||
if (statistics.overApproximationBuildAborted) { |
|||
stream << ">="; |
|||
} |
|||
stream << statistics.overApproximationStates.get() << std::endl; |
|||
stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; |
|||
stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; |
|||
stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; |
|||
} |
|||
|
|||
// The underapproximation MDP:
|
|||
if (statistics.underApproximationStates) { |
|||
stream << "# Number of states in the "; |
|||
if (options.refine) { |
|||
stream << "final "; |
|||
} |
|||
stream << "grid MDP for the under-approximation: "; |
|||
if (statistics.underApproximationBuildAborted) { |
|||
stream << ">="; |
|||
} |
|||
stream << statistics.underApproximationStates.get() << std::endl; |
|||
if (statistics.underApproximationStateLimit) { |
|||
stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; |
|||
} |
|||
stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; |
|||
stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; |
|||
} |
|||
|
|||
stream << "##########################################" << std::endl; |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) { |
|||
|
|||
if (options.discretize) { |
|||
std::vector<BeliefValueType> observationResolutionVector(pomdp.getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit)); |
|||
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); |
|||
if (rewardModelName) { |
|||
manager->setRewardModel(rewardModelName); |
|||
} |
|||
auto approx = std::make_shared<ExplorerType>(manager, pomdpValueBounds); |
|||
HeuristicParameters heuristicParameters; |
|||
heuristicParameters.gapThreshold = options.gapThresholdInit; |
|||
heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement
|
|||
heuristicParameters.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit; |
|||
heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; |
|||
|
|||
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx); |
|||
if (approx->hasComputedValues()) { |
|||
STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); |
|||
approx->getExploredMdp()->printModelInformationToStream(std::cout); |
|||
ValueType& resultValue = min ? result.lowerBound : result.upperBound; |
|||
resultValue = approx->getComputedValueAtInitialState(); |
|||
} |
|||
} |
|||
if (options.unfold) { // Underapproximation (uses a fresh Belief manager)
|
|||
auto manager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); |
|||
if (rewardModelName) { |
|||
manager->setRewardModel(rewardModelName); |
|||
} |
|||
auto approx = std::make_shared<ExplorerType>(manager, pomdpValueBounds); |
|||
HeuristicParameters heuristicParameters; |
|||
heuristicParameters.gapThreshold = options.gapThresholdInit; |
|||
heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; |
|||
heuristicParameters.sizeThreshold = options.sizeThresholdInit; |
|||
if (heuristicParameters.sizeThreshold == 0) { |
|||
if (options.explorationTimeLimit) { |
|||
heuristicParameters.sizeThreshold = std::numeric_limits<uint64_t>::max(); |
|||
} else { |
|||
heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); |
|||
STORM_PRINT_AND_LOG("Heuristically selected an under-approximation mdp size threshold of " << heuristicParameters.sizeThreshold << "." << std::endl); |
|||
} |
|||
} |
|||
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx); |
|||
if (approx->hasComputedValues()) { |
|||
STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); |
|||
approx->getExploredMdp()->printModelInformationToStream(std::cout); |
|||
ValueType& resultValue = min ? result.upperBound : result.lowerBound; |
|||
resultValue = approx->getComputedValueAtInitialState(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
void ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result) { |
|||
statistics.refinementSteps = 0; |
|||
|
|||
// Set up exploration data
|
|||
std::vector<BeliefValueType> observationResolutionVector; |
|||
std::shared_ptr<BeliefManagerType> overApproxBeliefManager; |
|||
std::shared_ptr<ExplorerType> overApproximation; |
|||
HeuristicParameters overApproxHeuristicPar; |
|||
if (options.discretize) { // Setup and build first OverApproximation
|
|||
observationResolutionVector = std::vector<BeliefValueType>(pomdp.getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit)); |
|||
overApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); |
|||
if (rewardModelName) { |
|||
overApproxBeliefManager->setRewardModel(rewardModelName); |
|||
} |
|||
overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, pomdpValueBounds); |
|||
overApproxHeuristicPar.gapThreshold = options.gapThresholdInit; |
|||
overApproxHeuristicPar.observationThreshold = options.obsThresholdInit; |
|||
overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit; |
|||
overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; |
|||
buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); |
|||
if (!overApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) { |
|||
return; |
|||
} |
|||
ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); |
|||
bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); |
|||
if (betterBound) { |
|||
STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); |
|||
} |
|||
} |
|||
|
|||
std::shared_ptr<BeliefManagerType> underApproxBeliefManager; |
|||
std::shared_ptr<ExplorerType> underApproximation; |
|||
HeuristicParameters underApproxHeuristicPar; |
|||
if (options.unfold) { // Setup and build first UnderApproximation
|
|||
underApproxBeliefManager = std::make_shared<BeliefManagerType>(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); |
|||
if (rewardModelName) { |
|||
underApproxBeliefManager->setRewardModel(rewardModelName); |
|||
} |
|||
underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, pomdpValueBounds); |
|||
underApproxHeuristicPar.gapThreshold = options.gapThresholdInit; |
|||
underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; |
|||
underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; |
|||
if (underApproxHeuristicPar.sizeThreshold == 0) { |
|||
// Select a decent value automatically
|
|||
underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); |
|||
} |
|||
buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); |
|||
if (!underApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) { |
|||
return; |
|||
} |
|||
ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); |
|||
bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); |
|||
if (betterBound) { |
|||
STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); |
|||
} |
|||
} |
|||
|
|||
// Do some output
|
|||
STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); |
|||
bool computingLowerBound = false; |
|||
bool computingUpperBound = false; |
|||
if (options.discretize) { |
|||
STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); |
|||
(min ? computingLowerBound : computingUpperBound) = true; |
|||
} |
|||
if (options.unfold) { |
|||
STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); |
|||
(min ? computingUpperBound : computingLowerBound) = true; |
|||
} |
|||
if (computingLowerBound && computingUpperBound) { |
|||
STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); |
|||
} else if (computingLowerBound) { |
|||
STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); |
|||
} else if (computingUpperBound) { |
|||
STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); |
|||
} |
|||
STORM_PRINT_AND_LOG(std::endl); |
|||
|
|||
// Start refinement
|
|||
STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); |
|||
STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); |
|||
while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { |
|||
bool overApproxFixPoint = true; |
|||
bool underApproxFixPoint = true; |
|||
if (options.discretize) { |
|||
// Refine over-approximation
|
|||
if (min) { |
|||
overApproximation->takeCurrentValuesAsLowerBounds(); |
|||
} else { |
|||
overApproximation->takeCurrentValuesAsUpperBounds(); |
|||
} |
|||
overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; |
|||
overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); |
|||
overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold); |
|||
overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; |
|||
overApproxFixPoint = buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); |
|||
if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { |
|||
ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); |
|||
bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); |
|||
if (betterBound) { |
|||
STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); |
|||
} |
|||
} else { |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (options.unfold && result.diff() > options.refinePrecision) { |
|||
// Refine under-approximation
|
|||
underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; |
|||
underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); |
|||
underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; |
|||
underApproxFixPoint = buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); |
|||
if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { |
|||
ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); |
|||
bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); |
|||
if (betterBound) { |
|||
STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); |
|||
} |
|||
} else { |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (storm::utility::resources::isTerminate()) { |
|||
break; |
|||
} else { |
|||
++statistics.refinementSteps.get(); |
|||
// Don't make too many outputs (to avoid logfile clutter)
|
|||
if (statistics.refinementSteps.get() <= 1000) { |
|||
STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); |
|||
bool computingLowerBound = false; |
|||
bool computingUpperBound = false; |
|||
if (options.discretize) { |
|||
STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); |
|||
(min ? computingLowerBound : computingUpperBound) = true; |
|||
} |
|||
if (options.unfold) { |
|||
STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); |
|||
(min ? computingUpperBound : computingLowerBound) = true; |
|||
} |
|||
if (computingLowerBound && computingUpperBound) { |
|||
STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); |
|||
} else if (computingLowerBound) { |
|||
STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); |
|||
} else if (computingUpperBound) { |
|||
STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); |
|||
} |
|||
STORM_PRINT_AND_LOG(std::endl); |
|||
STORM_LOG_WARN_COND(statistics.refinementSteps.get() < 1000, "Refinement requires more than 1000 iterations."); |
|||
} |
|||
} |
|||
if (overApproxFixPoint && underApproxFixPoint) { |
|||
STORM_PRINT_AND_LOG("Refinement fixpoint reached after " << statistics.refinementSteps.get() << " iterations." << std::endl); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/*!
|
|||
* Heuristically rates the quality of the approximation described by the given successor observation info. |
|||
* Here, 0 means a bad approximation and 1 means a good approximation. |
|||
*/ |
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
BeliefValueType ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) { |
|||
auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size()); |
|||
auto one = storm::utility::one<BeliefValueType>(); |
|||
if (storm::utility::isOne(n)) { |
|||
// If the belief is Dirac, it has to be approximated precisely.
|
|||
// In this case, we return the best possible rating
|
|||
return one; |
|||
} else { |
|||
// Create the rating for this observation at this choice from the given info
|
|||
BeliefValueType obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability); |
|||
// At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1
|
|||
// Normalize the rating so that it ranges from 0 to 1, where
|
|||
// 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated.
|
|||
obsChoiceRating = (obsChoiceRating * n - one) / (n - one); |
|||
// Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
|
|||
obsChoiceRating *= observationResolution / maxResolution; |
|||
return obsChoiceRating; |
|||
} |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
std::vector<BeliefValueType> ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector) { |
|||
uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates(); |
|||
auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices(); |
|||
BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); |
|||
|
|||
std::vector<BeliefValueType> resultingRatings(pomdp.getNrObservations(), storm::utility::one<BeliefValueType>()); |
|||
|
|||
std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
|
|||
for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) { |
|||
// Check whether this state is reached under an optimal scheduler.
|
|||
// The heuristic assumes that the remaining states are not relevant for the observation score.
|
|||
if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) { |
|||
for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) { |
|||
// Similarly, only optimal actions are relevant
|
|||
if (overApproximation->actionIsOptimal(mdpChoice)) { |
|||
// score the observations for this choice
|
|||
gatheredSuccessorObservations.clear(); |
|||
overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); |
|||
for (auto const& obsInfo : gatheredSuccessorObservations) { |
|||
auto const& obs = obsInfo.first; |
|||
BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); |
|||
|
|||
// The rating of the observation will be the minimum over all choice-based observation ratings
|
|||
resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return resultingRatings; |
|||
} |
|||
|
|||
template<typename ValueType> |
|||
ValueType getGap(ValueType const& l, ValueType const& u) { |
|||
STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(), "Gap computation currently does not handle negative values."); |
|||
if (storm::utility::isInfinity(u)) { |
|||
if (storm::utility::isInfinity(l)) { |
|||
return storm::utility::zero<ValueType>(); |
|||
} else { |
|||
return u; |
|||
} |
|||
} else if (storm::utility::isZero(u)) { |
|||
STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << "."); |
|||
return u; |
|||
} else { |
|||
STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << "."); |
|||
// get the relative gap
|
|||
return storm::utility::abs<ValueType>(u-l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l+u); |
|||
} |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) { |
|||
|
|||
// Detect whether the refinement reached a fixpoint.
|
|||
bool fixPoint = true; |
|||
|
|||
statistics.overApproximationBuildTime.start(); |
|||
storm::storage::BitVector refinedObservations; |
|||
if (!refine) { |
|||
// If we build the model from scratch, we first have to setup the explorer for the overApproximation.
|
|||
if (computeRewards) { |
|||
overApproximation->startNewExploration(storm::utility::zero<ValueType>()); |
|||
} else { |
|||
overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>()); |
|||
} |
|||
} else { |
|||
// If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy
|
|||
overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); |
|||
// We also need to find out which observation resolutions needs refinement.
|
|||
// current maximal resolution (needed for refinement heuristic)
|
|||
auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector); |
|||
// If there is a score < 1, we have not reached a fixpoint, yet
|
|||
BeliefValueType numericPresicion = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision); |
|||
if (std::any_of(obsRatings.begin(), obsRatings.end(), [&numericPresicion](ValueType const& value){return value + numericPresicion < storm::utility::one<ValueType>();})) { |
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because there are still observations to refine."); |
|||
fixPoint = false; |
|||
} |
|||
refinedObservations = storm::utility::vector::filter<ValueType>(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); |
|||
STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); |
|||
for (auto const& obs : refinedObservations) { |
|||
// Increment the resolution at the refined observations.
|
|||
// Use storm's rational number to detect overflows properly.
|
|||
storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) * storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor); |
|||
static_assert(storm::NumberTraits<BeliefValueType>::IsExact || std::is_same<BeliefValueType, double>::value, "Unhandled belief value type"); |
|||
if (!storm::NumberTraits<BeliefValueType>::IsExact && newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) { |
|||
observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max()); |
|||
} else { |
|||
observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational); |
|||
} |
|||
} |
|||
overApproximation->restartExploration(); |
|||
} |
|||
statistics.overApproximationMaxResolution = storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end())); |
|||
|
|||
// Start exploration
|
|||
storm::utility::Stopwatch explorationTime; |
|||
if (options.explorationTimeLimit) { |
|||
explorationTime.start(); |
|||
} |
|||
bool timeLimitExceeded = false; |
|||
std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
|
|||
uint64_t numRewiredOrExploredStates = 0; |
|||
while (overApproximation->hasUnexploredState()) { |
|||
if (!timeLimitExceeded && options.explorationTimeLimit && static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { |
|||
STORM_LOG_INFO("Exploration time limit exceeded."); |
|||
timeLimitExceeded = true; |
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because the exploration time limit is exceeded."); |
|||
fixPoint = false; |
|||
} |
|||
|
|||
uint64_t currId = overApproximation->exploreNextState(); |
|||
bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior(); |
|||
if (!hasOldBehavior) { |
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a new state is explored"); |
|||
fixPoint = false; // Exploring a new state!
|
|||
} |
|||
uint32_t currObservation = beliefManager->getBeliefObservation(currId); |
|||
if (targetObservations.count(currObservation) != 0) { |
|||
overApproximation->setCurrentStateIsTarget(); |
|||
overApproximation->addSelfloopTransition(); |
|||
} else { |
|||
// We need to decide how to treat this state (and each individual enabled action). There are the following cases:
|
|||
// 1 The state has no old behavior and
|
|||
// 1.1 we explore all actions or
|
|||
// 1.2 we truncate all actions
|
|||
// 2 The state has old behavior and was truncated in the last iteration and
|
|||
// 2.1 we explore all actions or
|
|||
// 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds)
|
|||
// 3 The state has old behavior and was not truncated in the last iteration and the current action
|
|||
// 3.1 should be rewired or
|
|||
// 3.2 should get the old behavior but either
|
|||
// 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action
|
|||
// 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step
|
|||
|
|||
// Find out in which case we are
|
|||
bool exploreAllActions = false; |
|||
bool truncateAllActions = false; |
|||
bool restoreAllActions = false; |
|||
bool checkRewireForAllActions = false; |
|||
// Get the relative gap
|
|||
ValueType gap = getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState()); |
|||
if (!hasOldBehavior) { |
|||
// Case 1
|
|||
// If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state
|
|||
if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { |
|||
exploreAllActions = true; // Case 1.1
|
|||
} else { |
|||
truncateAllActions = true; // Case 1.2
|
|||
overApproximation->setCurrentStateIsTruncated(); |
|||
} |
|||
} else { |
|||
if (overApproximation->getCurrentStateWasTruncated()) { |
|||
// Case 2
|
|||
if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { |
|||
exploreAllActions = true; // Case 2.1
|
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a previously truncated state is now explored."); |
|||
fixPoint = false; |
|||
} else { |
|||
truncateAllActions = true; // Case 2.2
|
|||
overApproximation->setCurrentStateIsTruncated(); |
|||
if (fixPoint) { |
|||
// Properly check whether this can still be a fixpoint
|
|||
if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) { |
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we truncate a state with non-zero gap " << gap << " that is reachable via an optimal sched."); |
|||
fixPoint = false; |
|||
} |
|||
//} else {
|
|||
// In this case we truncated a state that is not reachable under optimal schedulers.
|
|||
// If no other state is explored (i.e. fixPoint remaints true), these states should still not be reachable in subsequent iterations
|
|||
} |
|||
} |
|||
} else { |
|||
// Case 3
|
|||
// The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the action)
|
|||
if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { |
|||
checkRewireForAllActions = true; // Case 3.1 or Case 3.2
|
|||
} else { |
|||
restoreAllActions = true; // Definitely Case 3.2
|
|||
// We still need to check for each action whether rewiring makes sense later
|
|||
checkRewireForAllActions = true; |
|||
} |
|||
} |
|||
} |
|||
bool expandedAtLeastOneAction = false; |
|||
for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { |
|||
bool expandCurrentAction = exploreAllActions || truncateAllActions; |
|||
if (checkRewireForAllActions) { |
|||
assert(refine); |
|||
// In this case, we still need to check whether this action needs to be expanded
|
|||
assert(!expandCurrentAction); |
|||
// Check the action dependent conditions for rewiring
|
|||
// First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states)
|
|||
assert(overApproximation->currentStateHasOldBehavior()); |
|||
if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { |
|||
// Then, check whether the other criteria for rewiring are satisfied
|
|||
if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { |
|||
// Do the rewiring now! (Case 3.1)
|
|||
expandCurrentAction = true; |
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we rewire a state."); |
|||
fixPoint = false; |
|||
} else { |
|||
// Delay the rewiring (Case 3.2.2)
|
|||
overApproximation->setCurrentChoiceIsDelayed(action); |
|||
if (fixPoint) { |
|||
// Check whether this delay means that a fixpoint has not been reached
|
|||
if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) || (overApproximation->currentStateIsOptimalSchedulerReachable() && overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) { |
|||
STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap " << gap << " that is reachable via an optimal scheduler."); |
|||
fixPoint = false; |
|||
} |
|||
} |
|||
} |
|||
} // else { Case 3.2.1 }
|
|||
} |
|||
|
|||
if (expandCurrentAction) { |
|||
expandedAtLeastOneAction = true; |
|||
if (!truncateAllActions) { |
|||
// Cases 1.1, 2.1, or 3.1
|
|||
auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); |
|||
for (auto const& successor : successorGridPoints) { |
|||
overApproximation->addTransitionToBelief(action, successor.first, successor.second, false); |
|||
} |
|||
if (computeRewards) { |
|||
overApproximation->computeRewardAtCurrentState(action); |
|||
} |
|||
} else { |
|||
// Cases 1.2 or 2.2
|
|||
ValueType truncationProbability = storm::utility::zero<ValueType>(); |
|||
ValueType truncationValueBound = storm::utility::zero<ValueType>(); |
|||
auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); |
|||
for (auto const& successor : successorGridPoints) { |
|||
bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true); |
|||
if (!added) { |
|||
// We did not explore this successor state. Get a bound on the "missing" value
|
|||
truncationProbability += successor.second; |
|||
truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); |
|||
} |
|||
} |
|||
if (computeRewards) { |
|||
// The truncationValueBound will be added on top of the reward introduced by the current belief state.
|
|||
overApproximation->addTransitionsToExtraStates(action, truncationProbability); |
|||
overApproximation->computeRewardAtCurrentState(action, truncationValueBound); |
|||
} else { |
|||
overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); |
|||
} |
|||
} |
|||
} else { |
|||
// Case 3.2
|
|||
overApproximation->restoreOldBehaviorAtCurrentState(action); |
|||
} |
|||
} |
|||
if (expandedAtLeastOneAction) { |
|||
++numRewiredOrExploredStates; |
|||
} |
|||
} |
|||
|
|||
if (storm::utility::resources::isTerminate()) { |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (storm::utility::resources::isTerminate()) { |
|||
// don't overwrite statistics of a previous, successful computation
|
|||
if (!statistics.overApproximationStates) { |
|||
statistics.overApproximationBuildAborted = true; |
|||
statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); |
|||
} |
|||
statistics.overApproximationBuildTime.stop(); |
|||
return false; |
|||
} |
|||
|
|||
overApproximation->finishExploration(); |
|||
statistics.overApproximationBuildTime.stop(); |
|||
|
|||
statistics.overApproximationCheckTime.start(); |
|||
overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); |
|||
statistics.overApproximationCheckTime.stop(); |
|||
|
|||
// don't overwrite statistics of a previous, successful computation
|
|||
if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) { |
|||
statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates(); |
|||
} |
|||
return fixPoint; |
|||
} |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType> |
|||
bool ApproximatePOMDPModelchecker<PomdpModelType, BeliefValueType>::buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation) { |
|||
statistics.underApproximationBuildTime.start(); |
|||
bool fixPoint = true; |
|||
if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) { |
|||
statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; |
|||
} |
|||
if (!refine) { |
|||
// Build a new under approximation
|
|||
if (computeRewards) { |
|||
underApproximation->startNewExploration(storm::utility::zero<ValueType>()); |
|||
} else { |
|||
underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>()); |
|||
} |
|||
} else { |
|||
// Restart the building process
|
|||
underApproximation->restartExploration(); |
|||
} |
|||
|
|||
// Expand the beliefs
|
|||
storm::utility::Stopwatch explorationTime; |
|||
if (options.explorationTimeLimit) { |
|||
explorationTime.start(); |
|||
} |
|||
bool timeLimitExceeded = false; |
|||
while (underApproximation->hasUnexploredState()) { |
|||
if (!timeLimitExceeded && options.explorationTimeLimit && static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { |
|||
STORM_LOG_INFO("Exploration time limit exceeded."); |
|||
timeLimitExceeded = true; |
|||
} |
|||
uint64_t currId = underApproximation->exploreNextState(); |
|||
|
|||
uint32_t currObservation = beliefManager->getBeliefObservation(currId); |
|||
bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); |
|||
if (!stateAlreadyExplored || timeLimitExceeded) { |
|||
fixPoint = false; |
|||
} |
|||
if (targetObservations.count(currObservation) != 0) { |
|||
underApproximation->setCurrentStateIsTarget(); |
|||
underApproximation->addSelfloopTransition(); |
|||
} else { |
|||
bool stopExploration = false; |
|||
if (timeLimitExceeded) { |
|||
stopExploration = true; |
|||
underApproximation->setCurrentStateIsTruncated(); |
|||
} else if (!stateAlreadyExplored) { |
|||
// Check whether we want to explore the state now!
|
|||
ValueType gap = getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState()); |
|||
if (gap < heuristicParameters.gapThreshold) { |
|||
stopExploration = true; |
|||
underApproximation->setCurrentStateIsTruncated(); |
|||
} else if (underApproximation->getCurrentNumberOfMdpStates() >= heuristicParameters.sizeThreshold) { |
|||
stopExploration = true; |
|||
underApproximation->setCurrentStateIsTruncated(); |
|||
} |
|||
} |
|||
for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { |
|||
// Always restore old behavior if available
|
|||
if (stateAlreadyExplored) { |
|||
underApproximation->restoreOldBehaviorAtCurrentState(action); |
|||
} else { |
|||
ValueType truncationProbability = storm::utility::zero<ValueType>(); |
|||
ValueType truncationValueBound = storm::utility::zero<ValueType>(); |
|||
auto successors = beliefManager->expand(currId, action); |
|||
for (auto const& successor : successors) { |
|||
bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); |
|||
if (!added) { |
|||
STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); |
|||
// We did not explore this successor state. Get a bound on the "missing" value
|
|||
truncationProbability += successor.second; |
|||
// Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other
|
|||
// under-approximation values. In general, this does not yield a sound underapproximation anymore.
|
|||
// However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler.
|
|||
truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); |
|||
} |
|||
} |
|||
if (stopExploration) { |
|||
if (computeRewards) { |
|||
underApproximation->addTransitionsToExtraStates(action, truncationProbability); |
|||
} else { |
|||
underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); |
|||
} |
|||
} |
|||
if (computeRewards) { |
|||
// The truncationValueBound will be added on top of the reward introduced by the current belief state.
|
|||
underApproximation->computeRewardAtCurrentState(action, truncationValueBound); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
if (storm::utility::resources::isTerminate()) { |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (storm::utility::resources::isTerminate()) { |
|||
// don't overwrite statistics of a previous, successful computation
|
|||
if (!statistics.underApproximationStates) { |
|||
statistics.underApproximationBuildAborted = true; |
|||
statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); |
|||
} |
|||
statistics.underApproximationBuildTime.stop(); |
|||
return false; |
|||
} |
|||
|
|||
underApproximation->finishExploration(); |
|||
statistics.underApproximationBuildTime.stop(); |
|||
|
|||
statistics.underApproximationCheckTime.start(); |
|||
underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); |
|||
statistics.underApproximationCheckTime.stop(); |
|||
|
|||
// don't overwrite statistics of a previous, successful computation
|
|||
if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) { |
|||
statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates(); |
|||
} |
|||
return fixPoint; |
|||
|
|||
} |
|||
|
|||
template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<double>>; |
|||
template class ApproximatePOMDPModelchecker<storm::models::sparse::Pomdp<storm::RationalNumber>>; |
|||
|
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,121 @@ |
|||
#include "storm/api/storm.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/utility/logging.h" |
|||
#include "storm-pomdp/storage/Belief.h" |
|||
#include "storm-pomdp/storage/BeliefManager.h" |
|||
#include "storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h" |
|||
#include "storm-pomdp/builder/BeliefMdpExplorer.h" |
|||
|
|||
#include "storm/storage/jani/Property.h" |
|||
|
|||
namespace storm { |
|||
namespace logic { |
|||
class Formula; |
|||
} |
|||
|
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
|
|||
template<typename ValueType> |
|||
struct TrivialPomdpValueBounds; |
|||
|
|||
template<typename PomdpModelType, typename BeliefValueType = typename PomdpModelType::ValueType> |
|||
class ApproximatePOMDPModelchecker { |
|||
public: |
|||
typedef typename PomdpModelType::ValueType ValueType; |
|||
typedef typename PomdpModelType::RewardModelType RewardModelType; |
|||
typedef storm::storage::BeliefManager<PomdpModelType, BeliefValueType> BeliefManagerType; |
|||
typedef storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType> ExplorerType; |
|||
typedef ApproximatePOMDPModelCheckerOptions<ValueType> Options; |
|||
|
|||
struct Result { |
|||
Result(ValueType lower, ValueType upper); |
|||
ValueType lowerBound; |
|||
ValueType upperBound; |
|||
ValueType diff (bool relative = false) const; |
|||
bool updateLowerBound(ValueType const& value); |
|||
bool updateUpperBound(ValueType const& value); |
|||
}; |
|||
|
|||
ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); |
|||
|
|||
Result check(storm::logic::Formula const& formula); |
|||
|
|||
void printStatisticsToStream(std::ostream& stream) const; |
|||
|
|||
private: |
|||
/** |
|||
* Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size |
|||
* |
|||
* @param targetObservations set of target observations |
|||
* @param min true if minimum value is to be computed |
|||
* @param computeRewards true if rewards are to be computed, false if probability is computed |
|||
* @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value |
|||
* @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value |
|||
* @param maxUaModelSize the maximum size of the underapproximation model to be generated |
|||
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values |
|||
*/ |
|||
void computeReachabilityOTF(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result); |
|||
|
|||
|
|||
/** |
|||
* Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop |
|||
* |
|||
* @param targetObservations the set of observations to be reached |
|||
* @param min true if minimum probability is to be computed |
|||
* @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values |
|||
*/ |
|||
void refineReachability(std::set<uint32_t> const &targetObservations, bool min, boost::optional<std::string> rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds<ValueType> const& pomdpValueBounds, Result& result); |
|||
|
|||
struct HeuristicParameters { |
|||
ValueType gapThreshold; |
|||
ValueType observationThreshold; |
|||
uint64_t sizeThreshold; |
|||
ValueType optimalChoiceValueEpsilon; |
|||
}; |
|||
|
|||
/** |
|||
* Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties |
|||
* Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) |
|||
*/ |
|||
bool buildOverApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation); |
|||
|
|||
/** |
|||
* Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties |
|||
* Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) |
|||
*/ |
|||
bool buildUnderApproximation(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation); |
|||
|
|||
BeliefValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution); |
|||
|
|||
std::vector<BeliefValueType> getObservationRatings(std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector); |
|||
|
|||
struct Statistics { |
|||
Statistics(); |
|||
boost::optional<uint64_t> refinementSteps; |
|||
storm::utility::Stopwatch totalTime; |
|||
|
|||
boost::optional<uint64_t> overApproximationStates; |
|||
bool overApproximationBuildAborted; |
|||
storm::utility::Stopwatch overApproximationBuildTime; |
|||
storm::utility::Stopwatch overApproximationCheckTime; |
|||
boost::optional<BeliefValueType> overApproximationMaxResolution; |
|||
|
|||
boost::optional<uint64_t> underApproximationStates; |
|||
bool underApproximationBuildAborted; |
|||
storm::utility::Stopwatch underApproximationBuildTime; |
|||
storm::utility::Stopwatch underApproximationCheckTime; |
|||
boost::optional<uint64_t> underApproximationStateLimit; |
|||
|
|||
bool aborted; |
|||
}; |
|||
Statistics statistics; |
|||
|
|||
PomdpModelType const& pomdp; |
|||
Options options; |
|||
storm::utility::ConstantsComparator<ValueType> cc; |
|||
}; |
|||
|
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,207 @@ |
|||
#pragma once |
|||
|
|||
#include "storm-pomdp/analysis/FormulaInformation.h" |
|||
|
|||
#include "storm/api/verification.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
#include "storm/models/sparse/StandardRewardModel.h" |
|||
#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" |
|||
#include "storm/storage/Scheduler.h" |
|||
|
|||
#include "storm/utility/macros.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
#include "storm/exceptions/NotSupportedException.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace modelchecker { |
|||
|
|||
template<typename ValueType> |
|||
struct TrivialPomdpValueBounds { |
|||
std::vector<std::vector<ValueType>> lower; |
|||
std::vector<std::vector<ValueType>> upper; |
|||
ValueType getHighestLowerBound(uint64_t const& state) { |
|||
STORM_LOG_ASSERT(!lower.empty(), "requested a lower bound but none were available"); |
|||
auto it = lower.begin(); |
|||
ValueType result = (*it)[state]; |
|||
for (++it; it != lower.end(); ++it) { |
|||
result = std::max(result, (*it)[state]); |
|||
} |
|||
return result; |
|||
} |
|||
ValueType getSmallestUpperBound(uint64_t const& state) { |
|||
STORM_LOG_ASSERT(!upper.empty(), "requested an upper bound but none were available"); |
|||
auto it = upper.begin(); |
|||
ValueType result = (*it)[state]; |
|||
for (++it; it != upper.end(); ++it) { |
|||
result = std::min(result, (*it)[state]); |
|||
} |
|||
return result; |
|||
} |
|||
}; |
|||
|
|||
template <typename PomdpType> |
|||
class TrivialPomdpValueBoundsModelChecker { |
|||
public: |
|||
typedef typename PomdpType::ValueType ValueType; |
|||
typedef TrivialPomdpValueBounds<ValueType> ValueBounds; |
|||
TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { |
|||
// Intentionally left empty |
|||
} |
|||
|
|||
ValueBounds getValueBounds(storm::logic::Formula const& formula) { |
|||
return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); |
|||
} |
|||
|
|||
std::vector<ValueType> getChoiceValues(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards) { |
|||
std::vector<ValueType> choiceValues((pomdp.getNumberOfChoices())); |
|||
pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards); |
|||
return choiceValues; |
|||
} |
|||
|
|||
std::vector<ValueType> computeValuesForGuessedScheduler(std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, std::shared_ptr<storm::models::sparse::Mdp<ValueType>> underlyingMdp, ValueType const& scoreThreshold, bool relativeScore) { |
|||
// Create some positional scheduler for the POMDP |
|||
storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates()); |
|||
// For each state, we heuristically find a good distribution over output actions. |
|||
auto choiceValues = getChoiceValues(stateValues, actionBasedRewards); |
|||
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); |
|||
std::vector<storm::storage::Distribution<ValueType, uint_fast64_t>> choiceDistributions(pomdp.getNrObservations()); |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)]; |
|||
ValueType const& stateValue = stateValues[state]; |
|||
assert(stateValue >= storm::utility::zero<ValueType>()); |
|||
for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { |
|||
ValueType const& choiceValue = choiceValues[choice]; |
|||
assert(choiceValue >= storm::utility::zero<ValueType>()); |
|||
// Rate this choice by considering the relative difference between the choice value and the (optimal) state value |
|||
// A high score shall mean that the choice is "good" |
|||
if (storm::utility::isInfinity(stateValue)) { |
|||
// For infinity states, we simply distribute uniformly. |
|||
// FIXME: This case could be handled a bit more sensible |
|||
choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold); |
|||
} else { |
|||
ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue); |
|||
if (relativeScore) { |
|||
ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber<ValueType, uint64_t>(2); |
|||
if (!storm::utility::isZero(avg)) { |
|||
choiceScore /= avg; |
|||
} |
|||
} |
|||
choiceScore = storm::utility::one<ValueType>() - choiceScore; |
|||
if (choiceScore >= scoreThreshold) { |
|||
choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore); |
|||
} |
|||
} |
|||
|
|||
} |
|||
STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution."); |
|||
} |
|||
// Normalize all distributions |
|||
for (auto& choiceDistribution : choiceDistributions) { |
|||
choiceDistribution.normalize(); |
|||
} |
|||
// Set the scheduler for all states |
|||
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { |
|||
pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state); |
|||
} |
|||
STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); |
|||
auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); |
|||
|
|||
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false)); |
|||
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); |
|||
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); |
|||
std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
return pomdpSchedulerResult; |
|||
} |
|||
|
|||
ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { |
|||
STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); |
|||
|
|||
// Compute the values on the fully observable MDP |
|||
// We need an actual MDP so that we can apply schedulers below. |
|||
// Also, the api call in the next line will require a copy anyway. |
|||
auto underlyingMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); |
|||
auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false)); |
|||
STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); |
|||
STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); |
|||
std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector()); |
|||
|
|||
std::vector<ValueType> actionBasedRewards; |
|||
std::vector<ValueType>* actionBasedRewardsPtr = nullptr; |
|||
if (info.isNonNestedExpectedRewardFormula()) { |
|||
actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); |
|||
actionBasedRewardsPtr = &actionBasedRewards; |
|||
} |
|||
std::vector<std::vector<ValueType>> guessedSchedulerValues; |
|||
|
|||
std::vector<std::pair<double, bool>> guessParameters({{0.875,false},{0.875,true},{0.75,false},{0.75,true}}); |
|||
for (auto const& pars : guessParameters) { |
|||
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(pars.first), pars.second)); |
|||
} |
|||
|
|||
// compute the 'best' guess and do a few iterations on it |
|||
uint64_t bestGuess = 0; |
|||
ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero<ValueType>()); |
|||
for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) { |
|||
ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero<ValueType>()); |
|||
if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) { |
|||
bestGuess = guess; |
|||
bestGuessSum = guessSum; |
|||
} |
|||
} |
|||
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); |
|||
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); |
|||
guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); |
|||
|
|||
// Check if one of the guesses is worse than one of the others (and potentially delete it) |
|||
// Avoid deleting entries during the loop to ensure that indices remain valid |
|||
storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true); |
|||
for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) { |
|||
if (!keptGuesses.get(i)) { |
|||
continue; |
|||
} |
|||
for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) { |
|||
if (!keptGuesses.get(j)) { |
|||
continue; |
|||
} |
|||
if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal<ValueType>())) { |
|||
if (info.minimize()) { |
|||
// In this case we are guessing upper bounds (and smaller upper bounds are better) |
|||
keptGuesses.set(j, false); |
|||
} else { |
|||
// In this case we are guessing lower bounds (and larger lower bounds are better) |
|||
keptGuesses.set(i, false); |
|||
break; |
|||
} |
|||
} else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal<ValueType>())) { |
|||
if (info.minimize()) { |
|||
keptGuesses.set(i, false); |
|||
break; |
|||
} else { |
|||
keptGuesses.set(j, false); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
std::cout << "Keeping scheduler guesses " << keptGuesses << std::endl; |
|||
storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses); |
|||
|
|||
// Finally prepare the result |
|||
ValueBounds result; |
|||
if (info.minimize()) { |
|||
result.lower.push_back(std::move(fullyObservableResult)); |
|||
result.upper = std::move(guessedSchedulerValues); |
|||
} else { |
|||
result.lower = std::move(guessedSchedulerValues); |
|||
result.upper.push_back(std::move(fullyObservableResult)); |
|||
} |
|||
STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal<ValueType>()), "Lower bound is larger than upper bound"); |
|||
return result; |
|||
} |
|||
|
|||
private: |
|||
PomdpType const& pomdp; |
|||
}; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
namespace storm { |
|||
namespace pomdp { |
|||
// Structure used to represent a belief |
|||
template<typename ValueType> |
|||
struct Belief { |
|||
uint64_t id; |
|||
uint32_t observation; |
|||
//TODO make this sparse? |
|||
std::map<uint64_t, ValueType> probabilities; |
|||
}; |
|||
} |
|||
} |
|||
@ -0,0 +1,507 @@ |
|||
#pragma once |
|||
|
|||
#include <unordered_map> |
|||
#include <boost/optional.hpp> |
|||
#include <boost/container/flat_map.hpp> |
|||
#include <boost/container/flat_set.hpp> |
|||
#include "storm/adapters/RationalNumberAdapter.h" |
|||
#include "storm/utility/macros.h" |
|||
#include "storm/exceptions/UnexpectedException.h" |
|||
|
|||
namespace storm { |
|||
namespace storage { |
|||
|
|||
template <typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType, typename StateType = uint64_t> |
|||
class BeliefManager { |
|||
public: |
|||
|
|||
typedef typename PomdpType::ValueType ValueType; |
|||
typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType; // iterating over this shall be ordered (for correct hash computation) |
|||
typedef boost::container::flat_set<StateType> BeliefSupportType; |
|||
typedef uint64_t BeliefId; |
|||
|
|||
enum class TriangulationMode { |
|||
Static, |
|||
Dynamic |
|||
}; |
|||
|
|||
BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision, TriangulationMode const& triangulationMode) : pomdp(pomdp), cc(precision, false), triangulationMode(triangulationMode) { |
|||
beliefToIdMap.resize(pomdp.getNrObservations()); |
|||
initialBeliefId = computeInitialBelief(); |
|||
} |
|||
|
|||
void setRewardModel(boost::optional<std::string> rewardModelName = boost::none) { |
|||
if (rewardModelName) { |
|||
auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get()); |
|||
pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix()); |
|||
} else { |
|||
setRewardModel(pomdp.getUniqueRewardModelName()); |
|||
} |
|||
} |
|||
|
|||
void unsetRewardModel() { |
|||
pomdpActionRewardVector.clear(); |
|||
} |
|||
|
|||
struct Triangulation { |
|||
std::vector<BeliefId> gridPoints; |
|||
std::vector<BeliefValueType> weights; |
|||
uint64_t size() const { |
|||
return weights.size(); |
|||
} |
|||
}; |
|||
|
|||
BeliefId noId() const { |
|||
return std::numeric_limits<BeliefId>::max(); |
|||
} |
|||
|
|||
bool isEqual(BeliefId const& first, BeliefId const& second) const { |
|||
return isEqual(getBelief(first), getBelief(second)); |
|||
} |
|||
|
|||
std::string toString(BeliefId const& beliefId) const { |
|||
return toString(getBelief(beliefId)); |
|||
} |
|||
|
|||
|
|||
std::string toString(Triangulation const& t) const { |
|||
std::stringstream str; |
|||
str << "(\n"; |
|||
for (uint64_t i = 0; i < t.size(); ++i) { |
|||
str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; |
|||
} |
|||
str <<")\n"; |
|||
return str.str(); |
|||
} |
|||
|
|||
template <typename SummandsType> |
|||
ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { |
|||
ValueType result = storm::utility::zero<ValueType>(); |
|||
for (auto const& entry : getBelief(beliefId)) { |
|||
result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first)); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
|
|||
BeliefId const& getInitialBelief() const { |
|||
return initialBeliefId; |
|||
} |
|||
|
|||
ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { |
|||
auto const& belief = getBelief(beliefId); |
|||
STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); |
|||
auto result = storm::utility::zero<ValueType>(); |
|||
auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); |
|||
for (auto const &entry : belief) { |
|||
uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; |
|||
STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); |
|||
STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); |
|||
result += entry.second * pomdpActionRewardVector[choiceIndex]; |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
uint32_t getBeliefObservation(BeliefId beliefId) { |
|||
return getBeliefObservation(getBelief(beliefId)); |
|||
} |
|||
|
|||
uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { |
|||
auto const& belief = getBelief(beliefId); |
|||
return pomdp.getNumberOfChoices(belief.begin()->first); |
|||
} |
|||
|
|||
Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution) { |
|||
return triangulateBelief(getBelief(beliefId), resolution); |
|||
} |
|||
|
|||
template<typename DistributionType> |
|||
void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { |
|||
auto insertionRes = distr.emplace(state, value); |
|||
if (!insertionRes.second) { |
|||
insertionRes.first->second += value; |
|||
} |
|||
} |
|||
|
|||
void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) { |
|||
auto const& belief = getBelief(beliefId); |
|||
for (auto const& entry : belief) { |
|||
support.insert(entry.first); |
|||
} |
|||
} |
|||
|
|||
BeliefId getNumberOfBeliefIds() const { |
|||
return beliefs.size(); |
|||
} |
|||
|
|||
std::vector<std::pair<BeliefId, ValueType>> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector<BeliefValueType> const& observationResolutions) { |
|||
return expandInternal(beliefId, actionIndex, observationResolutions); |
|||
} |
|||
|
|||
std::vector<std::pair<BeliefId, ValueType>> expand(BeliefId const& beliefId, uint64_t actionIndex) { |
|||
return expandInternal(beliefId, actionIndex); |
|||
} |
|||
|
|||
private: |
|||
|
|||
BeliefType const& getBelief(BeliefId const& id) const { |
|||
STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); |
|||
STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); |
|||
return beliefs[id]; |
|||
} |
|||
|
|||
BeliefId getId(BeliefType const& belief) const { |
|||
uint32_t obs = getBeliefObservation(belief); |
|||
STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation."); |
|||
auto idIt = beliefToIdMap[obs].find(belief); |
|||
STORM_LOG_ASSERT(idIt != beliefToIdMap.end(), "Unknown Belief."); |
|||
return idIt->second; |
|||
} |
|||
|
|||
std::string toString(BeliefType const& belief) const { |
|||
std::stringstream str; |
|||
str << "{ "; |
|||
bool first = true; |
|||
for (auto const& entry : belief) { |
|||
if (first) { |
|||
first = false; |
|||
} else { |
|||
str << ", "; |
|||
} |
|||
str << entry.first << ": " << entry.second; |
|||
} |
|||
str << " }"; |
|||
return str.str(); |
|||
} |
|||
|
|||
bool isEqual(BeliefType const& first, BeliefType const& second) const { |
|||
if (first.size() != second.size()) { |
|||
return false; |
|||
} |
|||
auto secondIt = second.begin(); |
|||
for (auto const& firstEntry : first) { |
|||
if (firstEntry.first != secondIt->first) { |
|||
return false; |
|||
} |
|||
if (!cc.isEqual(firstEntry.second, secondIt->second)) { |
|||
return false; |
|||
} |
|||
++secondIt; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
bool assertBelief(BeliefType const& belief) const { |
|||
BeliefValueType sum = storm::utility::zero<ValueType>(); |
|||
boost::optional<uint32_t> observation; |
|||
for (auto const& entry : belief) { |
|||
if (entry.first >= pomdp.getNumberOfStates()) { |
|||
STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << "."); |
|||
return false; |
|||
} |
|||
uint64_t entryObservation = pomdp.getObservation(entry.first); |
|||
if (observation) { |
|||
if (observation.get() != entryObservation) { |
|||
STORM_LOG_ERROR("Beliefsupport contains different observations."); |
|||
return false; |
|||
} |
|||
} else { |
|||
observation = entryObservation; |
|||
} |
|||
// Don't use cc for these checks, because computations with zero are usually fine |
|||
if (storm::utility::isZero(entry.second)) { |
|||
// We assume that beliefs only consider their support. |
|||
STORM_LOG_ERROR("Zero belief probability."); |
|||
return false; |
|||
} |
|||
if (entry.second < storm::utility::zero<BeliefValueType>()) { |
|||
STORM_LOG_ERROR("Negative belief probability."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) { |
|||
STORM_LOG_ERROR("Belief probability greater than one."); |
|||
return false; |
|||
} |
|||
sum += entry.second; |
|||
} |
|||
if (!cc.isOne(sum)) { |
|||
STORM_LOG_ERROR("Belief does not sum up to one. (" << sum << " instead)."); |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { |
|||
if (triangulation.weights.size() != triangulation.gridPoints.size()) { |
|||
STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); |
|||
return false; |
|||
} |
|||
if (triangulation.size() == 0) { |
|||
STORM_LOG_ERROR("Empty triangulation."); |
|||
return false; |
|||
} |
|||
BeliefType triangulatedBelief; |
|||
BeliefValueType weightSum = storm::utility::zero<BeliefValueType>(); |
|||
for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { |
|||
if (cc.isZero(triangulation.weights[i])) { |
|||
STORM_LOG_ERROR("Zero weight in triangulation."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) { |
|||
STORM_LOG_ERROR("Negative weight in triangulation."); |
|||
return false; |
|||
} |
|||
if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) { |
|||
STORM_LOG_ERROR("Weight greater than one in triangulation."); |
|||
} |
|||
weightSum += triangulation.weights[i]; |
|||
BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]); |
|||
for (auto const& pointEntry : gridPoint) { |
|||
BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second; |
|||
triangulatedValue += triangulation.weights[i] * pointEntry.second; |
|||
} |
|||
} |
|||
if (!cc.isOne(weightSum)) { |
|||
STORM_LOG_ERROR("Triangulation weights do not sum up to one."); |
|||
return false; |
|||
} |
|||
if (!assertBelief(triangulatedBelief)) { |
|||
STORM_LOG_ERROR("Triangulated belief is not a belief."); |
|||
} |
|||
if (!isEqual(belief, triangulatedBelief)) { |
|||
STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
uint32_t getBeliefObservation(BeliefType belief) { |
|||
STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); |
|||
return pomdp.getObservation(belief.begin()->first); |
|||
} |
|||
|
|||
struct FreudenthalDiff { |
|||
FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { }; |
|||
StateType dimension; // i |
|||
BeliefValueType diff; // d[i] |
|||
bool operator>(FreudenthalDiff const& other) const { |
|||
if (diff != other.diff) { |
|||
return diff > other.diff; |
|||
} else { |
|||
return dimension < other.dimension; |
|||
} |
|||
} |
|||
}; |
|||
|
|||
void triangulateBeliefFreudenthal(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { |
|||
STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0"); |
|||
STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); |
|||
StateType numEntries = belief.size(); |
|||
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) |
|||
// Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N |
|||
// Variable names are mostly based on the paper |
|||
// However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). |
|||
// Initialize diffs and the first row of the 'qs' matrix (aka v) |
|||
std::set<FreudenthalDiff, std::greater<FreudenthalDiff>> sorted_diffs; // d (and p?) in the paper |
|||
std::vector<BeliefValueType> qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v |
|||
qsRow.reserve(numEntries); |
|||
std::vector<StateType> toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices |
|||
toOriginalIndicesMap.reserve(numEntries); |
|||
BeliefValueType x = resolution; |
|||
for (auto const& entry : belief) { |
|||
qsRow.push_back(storm::utility::floor(x)); // v |
|||
sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v |
|||
toOriginalIndicesMap.push_back(entry.first); |
|||
x -= entry.second * resolution; |
|||
} |
|||
// Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler |
|||
qsRow.push_back(storm::utility::zero<BeliefValueType>()); |
|||
|
|||
result.weights.reserve(numEntries); |
|||
result.gridPoints.reserve(numEntries); |
|||
auto currentSortedDiff = sorted_diffs.begin(); |
|||
auto previousSortedDiff = sorted_diffs.end(); |
|||
--previousSortedDiff; |
|||
for (StateType i = 0; i < numEntries; ++i) { |
|||
// Compute the weight for the grid points |
|||
BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; |
|||
if (i == 0) { |
|||
// The first weight is a bit different |
|||
weight += storm::utility::one<ValueType>(); |
|||
} else { |
|||
// 'compute' the next row of the qs matrix |
|||
qsRow[previousSortedDiff->dimension] += storm::utility::one<BeliefValueType>(); |
|||
} |
|||
if (!cc.isZero(weight)) { |
|||
result.weights.push_back(weight); |
|||
// Compute the grid point |
|||
BeliefType gridPoint; |
|||
for (StateType j = 0; j < numEntries; ++j) { |
|||
BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; |
|||
if (!cc.isZero(gridPointEntry)) { |
|||
gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution; |
|||
} |
|||
} |
|||
result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); |
|||
} |
|||
previousSortedDiff = currentSortedDiff++; |
|||
} |
|||
} |
|||
|
|||
void triangulateBeliefDynamic(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { |
|||
// Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal |
|||
STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); |
|||
BeliefValueType finalResolution = resolution; |
|||
uint64_t finalResolutionMisses = belief.size() + 1; |
|||
// We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these) |
|||
for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) { |
|||
uint64_t currResMisses = 0; |
|||
bool continueWithNextResolution = false; |
|||
for (auto const& belEntry : belief) { |
|||
BeliefValueType product = belEntry.second * currResolution; |
|||
if (!cc.isZero(product - storm::utility::round(product))) { |
|||
++currResMisses; |
|||
if (currResMisses >= finalResolutionMisses) { |
|||
// This resolution is not better than a previous resolution |
|||
continueWithNextResolution = true; |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
if (!continueWithNextResolution) { |
|||
STORM_LOG_ASSERT(currResMisses < finalResolutionMisses, "Distance for this resolution should not be larger than a previously checked one."); |
|||
finalResolution = currResolution; |
|||
finalResolutionMisses = currResMisses; |
|||
if (currResMisses == 0) { |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
STORM_LOG_TRACE("Picking resolution " << finalResolution << " for belief " << toString(belief)); |
|||
|
|||
// do standard freudenthal with the found resolution |
|||
triangulateBeliefFreudenthal(belief, finalResolution, result); |
|||
} |
|||
|
|||
Triangulation triangulateBelief(BeliefType const& belief, BeliefValueType const& resolution) { |
|||
STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); |
|||
Triangulation result; |
|||
// Quickly triangulate Dirac beliefs |
|||
if (belief.size() == 1u) { |
|||
result.weights.push_back(storm::utility::one<BeliefValueType>()); |
|||
result.gridPoints.push_back(getOrAddBeliefId(belief)); |
|||
} else { |
|||
auto ceiledResolution = storm::utility::ceil<BeliefValueType>(resolution); |
|||
switch (triangulationMode) { |
|||
case TriangulationMode::Static: |
|||
triangulateBeliefFreudenthal(belief, ceiledResolution, result); |
|||
break; |
|||
case TriangulationMode::Dynamic: |
|||
triangulateBeliefDynamic(belief, ceiledResolution, result); |
|||
break; |
|||
default: |
|||
STORM_LOG_ASSERT(false, "Invalid triangulation mode."); |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); |
|||
return result; |
|||
} |
|||
|
|||
std::vector<std::pair<BeliefId, ValueType>> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional<std::vector<BeliefValueType>> const& observationTriangulationResolutions = boost::none) { |
|||
std::vector<std::pair<BeliefId, ValueType>> destinations; |
|||
|
|||
BeliefType belief = getBelief(beliefId); |
|||
|
|||
// Find the probability we go to each observation |
|||
BeliefType successorObs; // This is actually not a belief but has the same type |
|||
for (auto const& pointEntry : belief) { |
|||
uint64_t state = pointEntry.first; |
|||
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { |
|||
if (!storm::utility::isZero(pomdpTransition.getValue())) { |
|||
auto obs = pomdp.getObservation(pomdpTransition.getColumn()); |
|||
addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Now for each successor observation we find and potentially triangulate the successor belief |
|||
for (auto const& successor : successorObs) { |
|||
BeliefType successorBelief; |
|||
for (auto const& pointEntry : belief) { |
|||
uint64_t state = pointEntry.first; |
|||
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { |
|||
if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { |
|||
ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; |
|||
addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); |
|||
} |
|||
} |
|||
} |
|||
STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); |
|||
|
|||
// Insert the destination. We know that destinations have to be disjoined since they have different observations |
|||
if (observationTriangulationResolutions) { |
|||
Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); |
|||
for (size_t j = 0; j < triangulation.size(); ++j) { |
|||
// Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times |
|||
destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second); |
|||
} |
|||
} else { |
|||
destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second); |
|||
} |
|||
} |
|||
|
|||
return destinations; |
|||
|
|||
} |
|||
|
|||
BeliefId computeInitialBelief() { |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, |
|||
"POMDP contains more than one initial state"); |
|||
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, |
|||
"POMDP does not contain an initial state"); |
|||
BeliefType belief; |
|||
belief[*pomdp.getInitialStates().begin()] = storm::utility::one<ValueType>(); |
|||
|
|||
STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); |
|||
return getOrAddBeliefId(belief); |
|||
} |
|||
|
|||
BeliefId getOrAddBeliefId(BeliefType const& belief) { |
|||
uint32_t obs = getBeliefObservation(belief); |
|||
STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation."); |
|||
auto insertioRes = beliefToIdMap[obs].emplace(belief, beliefs.size()); |
|||
if (insertioRes.second) { |
|||
// There actually was an insertion, so add the new belief |
|||
beliefs.push_back(belief); |
|||
} |
|||
// Return the id |
|||
return insertioRes.first->second; |
|||
} |
|||
|
|||
struct BeliefHash { |
|||
std::size_t operator()(const BeliefType& belief) const { |
|||
std::size_t seed = 0; |
|||
// Assumes that beliefs are ordered |
|||
for (auto const& entry : belief) { |
|||
boost::hash_combine(seed, entry.first); |
|||
boost::hash_combine(seed, entry.second); |
|||
} |
|||
return seed; |
|||
} |
|||
}; |
|||
|
|||
PomdpType const& pomdp; |
|||
std::vector<ValueType> pomdpActionRewardVector; |
|||
|
|||
std::vector<BeliefType> beliefs; |
|||
std::vector<std::unordered_map<BeliefType, BeliefId, BeliefHash>> beliefToIdMap; |
|||
BeliefId initialBeliefId; |
|||
|
|||
storm::utility::ConstantsComparator<ValueType> cc; |
|||
|
|||
TriangulationMode triangulationMode; |
|||
|
|||
}; |
|||
} |
|||
} |
|||
@ -0,0 +1,123 @@ |
|||
#include "KnownProbabilityTransformer.h"
|
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace transformer { |
|||
|
|||
template<typename ValueType> |
|||
KnownProbabilityTransformer<ValueType>::KnownProbabilityTransformer() { |
|||
// Intentionally left empty
|
|||
} |
|||
|
|||
template<typename ValueType> |
|||
std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> |
|||
KnownProbabilityTransformer<ValueType>::transform(storm::models::sparse::Pomdp<ValueType> const &pomdp, storm::storage::BitVector &prob0States, |
|||
storm::storage::BitVector &prob1States) { |
|||
std::map<uint64_t, uint64_t> stateMap; |
|||
std::map<uint32_t, uint32_t> observationMap; |
|||
|
|||
uint64_t nrNewStates = prob0States.empty() ? 1 : 2; |
|||
|
|||
storm::models::sparse::StateLabeling newLabeling(pomdp.getNumberOfStates() - prob0States.getNumberOfSetBits() - prob1States.getNumberOfSetBits() + nrNewStates); |
|||
|
|||
std::vector<uint32_t> newObservations; |
|||
|
|||
// New state 0 represents all states with probability 1
|
|||
for (auto const &iter : prob1States) { |
|||
stateMap[iter] = 0; |
|||
|
|||
std::set<std::string> labelSet = pomdp.getStateLabeling().getLabelsOfState(iter); |
|||
for (auto const &label : labelSet) { |
|||
if (!newLabeling.containsLabel(label)) { |
|||
newLabeling.addLabel(label); |
|||
} |
|||
newLabeling.addLabelToState(label, 0); |
|||
} |
|||
} |
|||
// New state 1 represents all states with probability 0
|
|||
for (auto const &iter : prob0States) { |
|||
stateMap[iter] = 1; |
|||
for (auto const &label : pomdp.getStateLabeling().getLabelsOfState(iter)) { |
|||
if (!newLabeling.containsLabel(label)) { |
|||
newLabeling.addLabel(label); |
|||
} |
|||
newLabeling.addLabelToState(label, 1); |
|||
} |
|||
} |
|||
|
|||
storm::storage::BitVector unknownStates = ~(prob1States | prob0States); |
|||
//If there are no states with probability 0 we set the next new state id to be 1, otherwise 2
|
|||
uint64_t newId = prob0States.empty() ? 1 : 2; |
|||
uint64_t nextObservation = prob0States.empty() ? 1 : 2; |
|||
for (auto const &iter : unknownStates) { |
|||
stateMap[iter] = newId; |
|||
if (observationMap.count(pomdp.getObservation(iter)) == 0) { |
|||
observationMap[pomdp.getObservation(iter)] = nextObservation; |
|||
++nextObservation; |
|||
} |
|||
for (auto const &label : pomdp.getStateLabeling().getLabelsOfState(iter)) { |
|||
if (!newLabeling.containsLabel(label)) { |
|||
newLabeling.addLabel(label); |
|||
} |
|||
newLabeling.addLabelToState(label, newId); |
|||
} |
|||
++newId; |
|||
} |
|||
|
|||
uint64_t newNrOfStates = pomdp.getNumberOfStates() - (prob1States.getNumberOfSetBits() + prob0States.getNumberOfSetBits()); |
|||
|
|||
uint64_t currentRow = 0; |
|||
uint64_t currentRowGroup = 0; |
|||
storm::storage::SparseMatrixBuilder<ValueType> smb(0, 0, 0, false, true); |
|||
//new row for prob 1 state
|
|||
smb.newRowGroup(currentRow); |
|||
smb.addNextValue(currentRow, 0, storm::utility::one<ValueType>()); |
|||
newObservations.push_back(0); |
|||
++currentRowGroup; |
|||
++currentRow; |
|||
if (!prob0States.empty()) { |
|||
smb.newRowGroup(currentRow); |
|||
smb.addNextValue(currentRow, 1, storm::utility::one<ValueType>()); |
|||
++currentRowGroup; |
|||
++currentRow; |
|||
newObservations.push_back(1); |
|||
} |
|||
|
|||
auto transitionMatrix = pomdp.getTransitionMatrix(); |
|||
|
|||
for (auto const &iter : unknownStates) { |
|||
smb.newRowGroup(currentRow); |
|||
// First collect all transitions
|
|||
//auto rowGroup = transitionMatrix.getRowGroup(iter);
|
|||
for (uint64_t row = 0; row < transitionMatrix.getRowGroupSize(iter); ++row) { |
|||
std::map<uint64_t, ValueType> transitionsInAction; |
|||
for (auto const &entry : transitionMatrix.getRow(iter, row)) { |
|||
// here we use the state mapping to collect all probabilities to get to a state with prob 0/1
|
|||
transitionsInAction[stateMap[entry.getColumn()]] += entry.getValue(); |
|||
} |
|||
for (auto const &transition : transitionsInAction) { |
|||
smb.addNextValue(currentRow, transition.first, transition.second); |
|||
} |
|||
++currentRow; |
|||
} |
|||
++currentRowGroup; |
|||
newObservations.push_back(observationMap[pomdp.getObservation(iter)]); |
|||
} |
|||
|
|||
auto newTransitionMatrix = smb.build(currentRow, newNrOfStates, currentRowGroup); |
|||
//STORM_PRINT(newTransitionMatrix)
|
|||
storm::storage::sparse::ModelComponents<ValueType> components(newTransitionMatrix, newLabeling); |
|||
components.observabilityClasses = newObservations; |
|||
|
|||
auto newPomdp = storm::models::sparse::Pomdp<ValueType>(components); |
|||
|
|||
newPomdp.printModelInformationToStream(std::cout); |
|||
|
|||
return std::make_shared<storm::models::sparse::Pomdp<ValueType>>(newPomdp); |
|||
} |
|||
|
|||
template class KnownProbabilityTransformer<double>; |
|||
template class KnownProbabilityTransformer<storm::RationalNumber>; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,17 @@ |
|||
#include "storm/api/storm.h" |
|||
#include "storm/models/sparse/Pomdp.h" |
|||
|
|||
namespace storm { |
|||
namespace pomdp { |
|||
namespace transformer { |
|||
template<class ValueType> |
|||
class KnownProbabilityTransformer { |
|||
public: |
|||
KnownProbabilityTransformer(); |
|||
|
|||
std::shared_ptr<storm::models::sparse::Pomdp<ValueType>> |
|||
transform(storm::models::sparse::Pomdp<ValueType> const &pomdp, storm::storage::BitVector &prob0States, storm::storage::BitVector &prob1States); |
|||
}; |
|||
} |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue