diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.cpp b/src/storm-pomdp-cli/settings/PomdpSettings.cpp new file mode 100644 index 000000000..73d725c09 --- /dev/null +++ b/src/storm-pomdp-cli/settings/PomdpSettings.cpp @@ -0,0 +1,71 @@ +#include "storm-pomdp-cli/settings/PomdpSettings.h" + +#include "storm/settings/SettingsManager.h" + +#include "storm/settings/modules/GeneralSettings.h" +#include "storm/settings/modules/CoreSettings.h" +#include "storm/settings/modules/IOSettings.h" +#include "storm/settings/modules/DebugSettings.h" +#include "storm/settings/modules/CuddSettings.h" +#include "storm/settings/modules/SylvanSettings.h" +#include "storm/settings/modules/EigenEquationSolverSettings.h" +#include "storm/settings/modules/GmmxxEquationSolverSettings.h" +#include "storm/settings/modules/NativeEquationSolverSettings.h" +#include "storm/settings/modules/EliminationSettings.h" +#include "storm/settings/modules/MinMaxEquationSolverSettings.h" +#include "storm/settings/modules/GameSolverSettings.h" +#include "storm/settings/modules/BisimulationSettings.h" +#include "storm/settings/modules/GlpkSettings.h" +#include "storm/settings/modules/GurobiSettings.h" +#include "storm/settings/modules/Smt2SmtSolverSettings.h" +#include "storm/settings/modules/ExplorationSettings.h" +#include "storm/settings/modules/ResourceSettings.h" +#include "storm/settings/modules/AbstractionSettings.h" +#include "storm/settings/modules/BuildSettings.h" +#include "storm/settings/modules/JitBuilderSettings.h" +#include "storm/settings/modules/TopologicalEquationSolverSettings.h" +#include "storm/settings/modules/ModelCheckerSettings.h" +#include "storm/settings/modules/MultiplierSettings.h" +#include "storm/settings/modules/TransformationSettings.h" +#include "storm/settings/modules/MultiObjectiveSettings.h" +#include "storm/settings/modules/HintSettings.h" +#include "storm/settings/modules/OviSolverSettings.h" + +#include "storm-pomdp-cli/settings/modules/POMDPSettings.h" +#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" + +namespace storm { + namespace settings { + void initializePomdpSettings(std::string const& name, std::string const& executableName) { + storm::settings::mutableManager().setName(name, executableName); + + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + + storm::settings::addModule(); + storm::settings::addModule(); + + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); + } + } +} + diff --git a/src/storm-pomdp-cli/settings/PomdpSettings.h b/src/storm-pomdp-cli/settings/PomdpSettings.h new file mode 100644 index 000000000..f3788c598 --- /dev/null +++ b/src/storm-pomdp-cli/settings/PomdpSettings.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace storm { + namespace settings { + /*! + * Initialize the settings manager. + */ + void initializePomdpSettings(std::string const& name, std::string const& executableName); + + } +} \ No newline at end of file diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp new file mode 100644 index 000000000..dc42679aa --- /dev/null +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.cpp @@ -0,0 +1,180 @@ +#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" + +#include "storm/settings/SettingsManager.h" +#include "storm/settings/SettingMemento.h" +#include "storm/settings/Option.h" +#include "storm/settings/OptionBuilder.h" +#include "storm/settings/ArgumentBuilder.h" + +#include "storm/utility/NumberTraits.h" +#include "storm/adapters/RationalNumberAdapter.h" +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h" + +#include "storm/exceptions/InvalidArgumentException.h" + + +namespace storm { + namespace settings { + namespace modules { + + const std::string BeliefExplorationSettings::moduleName = "belexpl"; + + const std::string refineOption = "refine"; + const std::string explorationTimeLimitOption = "exploration-time"; + const std::string resolutionOption = "resolution"; + const std::string sizeThresholdOption = "size-threshold"; + const std::string gapThresholdOption = "gap-threshold"; + const std::string schedulerThresholdOption = "scheduler-threshold"; + const std::string observationThresholdOption = "obs-threshold"; + const std::string numericPrecisionOption = "numeric-precision"; + const std::string triangulationModeOption = "triangulationmode"; + + BeliefExplorationSettings::BeliefExplorationSettings() : ModuleSettings(moduleName) { + + this->addOption(storm::settings::OptionBuilder(moduleName, refineOption, false,"Refines the result bounds until reaching either the goal precision or the refinement step limit").addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("prec","The goal precision.").setDefaultValueDouble(1e-4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0.0)).build()).addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("steps","The number of allowed refinement steps (0 means no limit).").setDefaultValueUnsignedInteger(0).makeOptional().build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, explorationTimeLimitOption, false, "Sets after which time no further states shall be explored.").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("time","In seconds.").build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, resolutionOption, false,"Sets the resolution of the discretization and how it is increased in case of refinement").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","the initial resolution (higher means more precise)").setDefaultValueUnsignedInteger(3).addValidatorUnsignedInteger(storm::settings::ArgumentValidatorFactory::createUnsignedGreaterValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the resolution of refined observations (higher means more precise).").setDefaultValueDouble(2).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterValidator(1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, observationThresholdOption, false,"Only observations whose score is below this threshold will be refined.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Controlls how fast the threshold is increased in each refinement step (higher means more precise).").setDefaultValueDouble(0.1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, sizeThresholdOption, false,"Sets how many new states are explored or rewired in a refinement step and how this value is increased in case of refinement.").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("init","initial limit (higher means more precise, 0 means automatic choice)").setDefaultValueUnsignedInteger(0).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Before each step the new threshold is set to the current state count times this number (higher means more precise).").setDefaultValueDouble(4).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, gapThresholdOption, false,"Sets how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means less precise").setDefaultValueDouble(0.1).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the gap in each refinement step (higher means less precise).").setDefaultValueDouble(0.25).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0,1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, schedulerThresholdOption, false,"Sets how much worse a sub-optimal choice can be in order to be included in the relevant explored fragment").setIsAdvanced().addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("init","initial threshold (higher means more precise").setDefaultValueDouble(1e-3).addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(0)).build()).addArgument(storm::settings::ArgumentBuilder::createDoubleArgument("factor","Multiplied to the threshold in each refinement step (higher means more precise).").setDefaultValueDouble(1).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleGreaterEqualValidator(1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, numericPrecisionOption, false,"Sets the precision used to determine whether two belief-states are equal.").setIsAdvanced().addArgument( + storm::settings::ArgumentBuilder::createDoubleArgument("value","the precision").setDefaultValueDouble(1e-9).makeOptional().addValidatorDouble(storm::settings::ArgumentValidatorFactory::createDoubleRangeValidatorIncluding(0, 1)).build()).build()); + + this->addOption(storm::settings::OptionBuilder(moduleName, triangulationModeOption, false,"Sets how to triangulate beliefs when discretizing.").setIsAdvanced().addArgument( + storm::settings::ArgumentBuilder::createStringArgument("value","the triangulation mode").setDefaultValueString("dynamic").addValidatorString(storm::settings::ArgumentValidatorFactory::createMultipleChoiceValidator({"dynamic", "static"})).build()).build()); + } + + bool BeliefExplorationSettings::isRefineSet() const { + return this->getOption(refineOption).getHasOptionBeenSet(); + } + + double BeliefExplorationSettings::getRefinePrecision() const { + return this->getOption(refineOption).getArgumentByName("prec").getValueAsDouble(); + } + + bool BeliefExplorationSettings::isRefineStepLimitSet() const { + return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger() != 0; + } + + uint64_t BeliefExplorationSettings::getRefineStepLimit() const { + assert(isRefineStepLimitSet()); + return this->getOption(refineOption).getArgumentByName("steps").getValueAsUnsignedInteger(); + } + + bool BeliefExplorationSettings::isExplorationTimeLimitSet() const { + return this->getOption(explorationTimeLimitOption).getHasOptionBeenSet(); + } + + uint64_t BeliefExplorationSettings::getExplorationTimeLimit() const { + return this->getOption(explorationTimeLimitOption).getArgumentByName("time").getValueAsUnsignedInteger(); + } + + uint64_t BeliefExplorationSettings::getResolutionInit() const { + return this->getOption(resolutionOption).getArgumentByName("init").getValueAsUnsignedInteger(); + } + + double BeliefExplorationSettings::getResolutionFactor() const { + return this->getOption(resolutionOption).getArgumentByName("factor").getValueAsDouble(); + } + + uint64_t BeliefExplorationSettings::getSizeThresholdInit() const { + return this->getOption(sizeThresholdOption).getArgumentByName("init").getValueAsUnsignedInteger(); + } + + double BeliefExplorationSettings::getSizeThresholdFactor() const { + return this->getOption(sizeThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + double BeliefExplorationSettings::getGapThresholdInit() const { + return this->getOption(gapThresholdOption).getArgumentByName("init").getValueAsDouble(); + } + + double BeliefExplorationSettings::getGapThresholdFactor() const { + return this->getOption(gapThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + double BeliefExplorationSettings::getOptimalChoiceValueThresholdInit() const { + return this->getOption(schedulerThresholdOption).getArgumentByName("init").getValueAsDouble(); + } + + double BeliefExplorationSettings::getOptimalChoiceValueThresholdFactor() const { + return this->getOption(schedulerThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + double BeliefExplorationSettings::getObservationScoreThresholdInit() const { + return this->getOption(observationThresholdOption).getArgumentByName("init").getValueAsDouble(); + } + + double BeliefExplorationSettings::getObservationScoreThresholdFactor() const { + return this->getOption(observationThresholdOption).getArgumentByName("factor").getValueAsDouble(); + } + + bool BeliefExplorationSettings::isNumericPrecisionSetFromDefault() const { + return !this->getOption(numericPrecisionOption).getHasOptionBeenSet() || this->getOption(numericPrecisionOption).getArgumentByName("value").wasSetFromDefaultValue(); + } + + double BeliefExplorationSettings::getNumericPrecision() const { + return this->getOption(numericPrecisionOption).getArgumentByName("value").getValueAsDouble(); + } + + bool BeliefExplorationSettings::isDynamicTriangulationModeSet() const { + return this->getOption(triangulationModeOption).getArgumentByName("value").getValueAsString() == "dynamic"; + + } + bool BeliefExplorationSettings::isStaticTriangulationModeSet() const { + return this->getOption(triangulationModeOption).getArgumentByName("value").getValueAsString() == "static"; + } + + template + void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const { + options.refine = isRefineSet(); + options.refinePrecision = getRefinePrecision(); + if (isRefineStepLimitSet()) { + options.refineStepLimit = getRefineStepLimit(); + } else { + options.refineStepLimit = boost::none; + } + if (isExplorationTimeLimitSet()) { + options.explorationTimeLimit = getExplorationTimeLimit(); + } else { + options.explorationTimeLimit = boost::none; + } + options.resolutionInit = getResolutionInit(); + options.resolutionFactor = storm::utility::convertNumber(getResolutionFactor()); + options.sizeThresholdInit = getSizeThresholdInit(); + options.sizeThresholdFactor = storm::utility::convertNumber(getSizeThresholdFactor()); + options.gapThresholdInit = storm::utility::convertNumber(getGapThresholdInit()); + options.gapThresholdFactor = storm::utility::convertNumber(getGapThresholdFactor()); + options.optimalChoiceValueThresholdInit = storm::utility::convertNumber(getOptimalChoiceValueThresholdInit()); + options.optimalChoiceValueThresholdFactor = storm::utility::convertNumber(getOptimalChoiceValueThresholdFactor()); + options.obsThresholdInit = storm::utility::convertNumber(getObservationScoreThresholdInit()); + options.obsThresholdIncrementFactor = storm::utility::convertNumber(getObservationScoreThresholdFactor()); + + options.numericPrecision = getNumericPrecision(); + if (storm::NumberTraits::IsExact) { + if (isNumericPrecisionSetFromDefault()) { + STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); + options.numericPrecision = storm::utility::zero(); + } else { + STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); + } + } + options.dynamicTriangulation = isDynamicTriangulationModeSet(); + } + + template void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; + template void BeliefExplorationSettings::setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; + + + + } // namespace modules + } // namespace settings +} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h new file mode 100644 index 000000000..82d1d6010 --- /dev/null +++ b/src/storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h @@ -0,0 +1,77 @@ +#pragma once + +#include "storm-config.h" +#include "storm/settings/modules/ModuleSettings.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + struct ApproximatePOMDPModelCheckerOptions; + } + } + + namespace settings { + namespace modules { + + /*! + * This class represents the settings for POMDP model checking. + */ + class BeliefExplorationSettings : public ModuleSettings { + public: + + /*! + * Creates a new set of POMDP settings. + */ + BeliefExplorationSettings(); + + virtual ~BeliefExplorationSettings() = default; + + bool isRefineSet() const; + double getRefinePrecision() const; + bool isRefineStepLimitSet() const; + uint64_t getRefineStepLimit() const; + + bool isExplorationTimeLimitSet() const; + uint64_t getExplorationTimeLimit() const; + + /// Discretization Resolution + uint64_t getResolutionInit() const; + double getResolutionFactor() const; + /// The maximal number of newly expanded MDP states in a refinement step + uint64_t getSizeThresholdInit() const; + double getSizeThresholdFactor() const; + + /// Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore + double getGapThresholdInit() const; + double getGapThresholdFactor() const; + + /// Controls whether "almost optimal" choices will be considered optimal + double getOptimalChoiceValueThresholdInit() const; + double getOptimalChoiceValueThresholdFactor() const; + + /// Controls which observations are refined. + double getObservationScoreThresholdInit() const; + double getObservationScoreThresholdFactor() const; + + /// Used to determine whether two beliefs are equal + bool isNumericPrecisionSetFromDefault() const; + double getNumericPrecision() const; + + bool isDynamicTriangulationModeSet() const; + bool isStaticTriangulationModeSet() const; + + template + void setValuesInOptionsStruct(storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions& options) const; + + // The name of the module. + static const std::string moduleName; + + private: + + + }; + + } // namespace modules + } // namespace settings +} // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 2eb418dab..09a5342b3 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -13,7 +13,10 @@ namespace storm { namespace modules { const std::string POMDPSettings::moduleName = "pomdp"; + const std::string noCanonicOption = "nocanonic"; const std::string exportAsParametricModelOption = "parametric-drn"; + const std::string beliefExplorationOption = "belief-exploration"; + std::vector beliefExplorationModes = {"both", "discretize", "unfold"}; const std::string qualitativeReductionOption = "qualitativereduction"; const std::string analyzeUniqueObservationsOption = "uniqueobservations"; const std::string mecReductionOption = "mecreduction"; @@ -25,8 +28,12 @@ namespace storm { std::vector fscModes = {"standard", "simple-linear", "simple-linear-inverse"}; const std::string transformBinaryOption = "transformbinary"; const std::string transformSimpleOption = "transformsimple"; + const std::string memlessSearchOption = "memlesssearch"; + std::vector memlessSearchMethods = {"none", "ccdmemless", "ccdmemory", "iterative"}; + const std::string checkFullyObservableOption = "check-fully-observable"; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { + this->addOption(storm::settings::OptionBuilder(moduleName, noCanonicOption, false, "If this is set, actions will not be ordered canonically. Could yield incorrect results.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, qualitativeReductionOption, false, "Reduces the model size by performing qualitative analysis (E.g. merge states with prob. 1.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, analyzeUniqueObservationsOption, false, "Computes the states with a unique observation").build()); @@ -37,6 +44,14 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, fscmode, false, "Sets the way the pMC is obtained").addArgument(storm::settings::ArgumentBuilder::createStringArgument("type", "type name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(fscModes)).setDefaultValueString("standard").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformBinaryOption, false, "Transforms the pomdp to a binary pomdp.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, transformSimpleOption, false, "Transforms the pomdp to a binary and simple pomdp.").build()); + this->addOption(storm::settings::OptionBuilder(moduleName, beliefExplorationOption, false,"Analyze the POMDP by exploring the belief state-space.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("mode", "Sets whether lower, upper, or interval result bounds are computed.").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(beliefExplorationModes)).setDefaultValueString("both").makeOptional().build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, memlessSearchOption, false, "Search for a qualitative memoryless scheuler").addArgument(storm::settings::ArgumentBuilder::createStringArgument("method", "method name").addValidatorString(ArgumentValidatorFactory::createMultipleChoiceValidator(memlessSearchMethods)).setDefaultValueString("none").build()).build()); + this->addOption(storm::settings::OptionBuilder(moduleName, checkFullyObservableOption, false, "Performs standard model checking on the underlying MDP").build()); + + } + + bool POMDPSettings::isNoCanonicSet() const { + return this->getOption(noCanonicOption).getHasOptionBeenSet(); } bool POMDPSettings::isExportToParametricSet() const { @@ -62,7 +77,33 @@ namespace storm { bool POMDPSettings::isSelfloopReductionSet() const { return this->getOption(selfloopReductionOption).getHasOptionBeenSet(); } + + bool POMDPSettings::isBeliefExplorationSet() const { + return this->getOption(beliefExplorationOption).getHasOptionBeenSet(); + } + bool POMDPSettings::isBeliefExplorationDiscretizeSet() const { + std::string arg = this->getOption(beliefExplorationOption).getArgumentByName("mode").getValueAsString(); + return isBeliefExplorationSet() && (arg == "discretize" || arg == "both"); + } + + bool POMDPSettings::isBeliefExplorationUnfoldSet() const { + std::string arg = this->getOption(beliefExplorationOption).getArgumentByName("mode").getValueAsString(); + return isBeliefExplorationSet() && (arg == "unfold" || arg == "both"); + } + + bool POMDPSettings::isMemlessSearchSet() const { + return this->getOption(memlessSearchOption).getHasOptionBeenSet(); + } + + bool POMDPSettings::isCheckFullyObservableSet() const { + return this->getOption(checkFullyObservableOption).getHasOptionBeenSet(); + } + + std::string POMDPSettings::getMemlessSearchMethod() const { + return this->getOption(memlessSearchOption).getArgumentByName("method").getValueAsString(); + } + uint64_t POMDPSettings::getMemoryBound() const { return this->getOption(memoryBoundOption).getArgumentByName("bound").getValueAsUnsignedInteger(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 5b1d6cd9b..d1a9e6b82 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -25,13 +25,22 @@ namespace storm { std::string getExportToParametricFilename() const; bool isQualitativeReductionSet() const; + + bool isNoCanonicSet() const; + bool isBeliefExplorationSet() const; + bool isBeliefExplorationDiscretizeSet() const; + bool isBeliefExplorationUnfoldSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; bool isSelfloopReductionSet() const; bool isTransformSimpleSet() const; bool isTransformBinarySet() const; + bool isMemlessSearchSet() const; + bool isCheckFullyObservableSet() const; + std::string getMemlessSearchMethod() const; std::string getFscApplicationTypeString() const; uint64_t getMemoryBound() const; + storm::storage::PomdpMemoryPattern getMemoryPattern() const; bool check() const override; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ef0b4c38f..d2f139a58 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -3,36 +3,17 @@ #include "storm/utility/initialize.h" #include "storm/settings/modules/GeneralSettings.h" -#include "storm/settings/modules/CoreSettings.h" -#include "storm/settings/modules/IOSettings.h" #include "storm/settings/modules/DebugSettings.h" -#include "storm/settings/modules/CuddSettings.h" -#include "storm/settings/modules/SylvanSettings.h" -#include "storm/settings/modules/EigenEquationSolverSettings.h" -#include "storm/settings/modules/GmmxxEquationSolverSettings.h" -#include "storm/settings/modules/NativeEquationSolverSettings.h" -#include "storm/settings/modules/EliminationSettings.h" -#include "storm/settings/modules/MinMaxEquationSolverSettings.h" -#include "storm/settings/modules/GameSolverSettings.h" -#include "storm/settings/modules/BisimulationSettings.h" -#include "storm/settings/modules/GlpkSettings.h" -#include "storm/settings/modules/GurobiSettings.h" -#include "storm/settings/modules/Smt2SmtSolverSettings.h" -#include "storm/settings/modules/ExplorationSettings.h" -#include "storm/settings/modules/ResourceSettings.h" -#include "storm/settings/modules/AbstractionSettings.h" -#include "storm/settings/modules/BuildSettings.h" -#include "storm/settings/modules/JitBuilderSettings.h" -#include "storm/settings/modules/MultiObjectiveSettings.h" - -#include "storm/settings/modules/HintSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" +#include "storm-pomdp-cli/settings/modules/BeliefExplorationSettings.h" +#include "storm-pomdp-cli/settings/PomdpSettings.h" #include "storm/analysis/GraphConditions.h" #include "storm-cli-utilities/cli.h" #include "storm-cli-utilities/model-handling.h" +#include "storm-pomdp/transformer/KnownProbabilityTransformer.h" #include "storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.h" #include "storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.h" #include "storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.h" @@ -41,192 +22,357 @@ #include "storm-pomdp/transformer/MakePOMDPCanonic.h" #include "storm-pomdp/analysis/UniqueObservationStates.h" #include "storm-pomdp/analysis/QualitativeAnalysis.h" -#include "storm/api/storm.h" - -/*! - * Initialize the settings manager. - */ -void initializeSettings() { - storm::settings::mutableManager().setName("Storm-POMDP", "storm-pomdp"); - - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - storm::settings::addModule(); - - storm::settings::addModule(); - storm::settings::addModule(); - - - - storm::settings::addModule(); -} - -/*! - * Entry point for the pomdp backend. - * - * @param argc The argc argument of main(). - * @param argv The argv argument of main(). - * @return Return code, 0 if successfull, not 0 otherwise. - */ -int main(const int argc, const char** argv) { - try { - storm::utility::setUp(); - storm::cli::printHeader("Storm-pomdp", argc, argv); - initializeSettings(); - - bool optionsCorrect = storm::cli::parseOptions(argc, argv); - if (!optionsCorrect) { - return -1; - } - storm::cli::setUrgentOptions(); +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" +#include "storm-pomdp/analysis/FormulaInformation.h" +#include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" +#include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" +#include "storm/api/storm.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/utility/NumberTraits.h" +#include "storm/utility/Stopwatch.h" +#include "storm/utility/SignalHandler.h" +#include "storm/utility/NumberTraits.h" +#include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" - auto const& pomdpSettings = storm::settings::getModule(); +#include - auto symbolicInput = storm::cli::parseSymbolicInput(); - storm::cli::ModelProcessingInformation mpi; - std::tie(symbolicInput, mpi) = storm::cli::preprocessSymbolicInput(symbolicInput); - - // We should not export here if we are going to do some processing first. - auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, mpi); - STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); - std::shared_ptr> pomdp = model->template as>(); - storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); - pomdp = makeCanonic.transform(); - - std::shared_ptr formula; - if (!symbolicInput.properties.empty()) { - formula = symbolicInput.properties.front().getRawFormula(); - STORM_PRINT_AND_LOG("Analyzing property '" << *formula << "'" << std::endl); - STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); - } - - if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { - STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); - storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); - std::cout << uniqueAnalysis.analyse() << std::endl; - } - - if (formula) { - if (formula->isProbabilityOperatorFormula()) { - if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); +namespace storm { + namespace pomdp { + namespace cli { + + /// Perform preprocessings based on the graph structure (if requested or necessary). Return true, if some preprocessing has been done + template + bool performPreprocessing(std::shared_ptr>& pomdp, storm::pomdp::analysis::FormulaInformation& formulaInfo, storm::logic::Formula const& formula) { + auto const& pomdpSettings = storm::settings::getModule(); + bool preprocessingPerformed = false; + if (pomdpSettings.isSelfloopReductionSet()) { + bool apply = formulaInfo.isNonNestedReachabilityProbability() && formulaInfo.maximize(); + apply = apply || (formulaInfo.isNonNestedExpectedRewardFormula() && formulaInfo.minimize()); + if (apply) { + STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); + pomdp = selfLoopEliminator.transform(); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + preprocessingPerformed = true; + } } - if (pomdpSettings.isQualitativeReductionSet()) { - storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); + if (pomdpSettings.isQualitativeReductionSet() && formulaInfo.isNonNestedReachabilityProbability()) { + storm::analysis::QualitativeAnalysis qualitativeAnalysis(*pomdp); STORM_PRINT_AND_LOG("Computing states with probability 0 ..."); - std::cout << qualitativeAnalysis.analyseProb0(formula->asProbabilityOperatorFormula()) << std::endl; + storm::storage::BitVector prob0States = qualitativeAnalysis.analyseProb0(formula.asProbabilityOperatorFormula()); + std::cout << prob0States << std::endl; STORM_PRINT_AND_LOG(" done." << std::endl); STORM_PRINT_AND_LOG("Computing states with probability 1 ..."); - std::cout << qualitativeAnalysis.analyseProb1(formula->asProbabilityOperatorFormula()) << std::endl; + storm::storage::BitVector prob1States = qualitativeAnalysis.analyseProb1(formula.asProbabilityOperatorFormula()); + std::cout << prob1States << std::endl; STORM_PRINT_AND_LOG(" done." << std::endl); - std::cout << "actual reduction not yet implemented..." << std::endl; + storm::pomdp::transformer::KnownProbabilityTransformer kpt = storm::pomdp::transformer::KnownProbabilityTransformer(); + pomdp = kpt.transform(*pomdp, prob0States, prob1States); + // Update formulaInfo to changes from Preprocessing + formulaInfo.updateTargetStates(*pomdp, std::move(prob1States)); + formulaInfo.updateSinkStates(*pomdp, std::move(prob0States)); + preprocessingPerformed = true; } - } else if (formula->isRewardOperatorFormula()) { - if (pomdpSettings.isSelfloopReductionSet() && storm::solver::minimize(formula->asRewardOperatorFormula().getOptimalityType())) { - STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); - pomdp = selfLoopEliminator.transform(); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); + return preprocessingPerformed; + } + + template + void printResult(ValueType const& lowerBound, ValueType const& upperBound) { + if (lowerBound == upperBound) { + if (storm::utility::isInfinity(lowerBound)) { + STORM_PRINT_AND_LOG("inf"); + } else { + STORM_PRINT_AND_LOG(lowerBound); + } + } else if (storm::utility::isInfinity(-lowerBound)) { + if (storm::utility::isInfinity(upperBound)) { + STORM_PRINT_AND_LOG("[-inf, inf] (width=inf)"); + } else { + // Only upper bound is known + STORM_PRINT_AND_LOG("≤ " << upperBound); + } + } else if (storm::utility::isInfinity(upperBound)) { + STORM_PRINT_AND_LOG("≥ " << lowerBound); + } else { + STORM_PRINT_AND_LOG("[" << lowerBound << ", " << upperBound << "] (width=" << ValueType(upperBound - lowerBound) << ")"); + } + if (storm::NumberTraits::IsExact) { + STORM_PRINT_AND_LOG(" (approx. "); + double roundedLowerBound = storm::utility::isInfinity(-lowerBound) ? -storm::utility::infinity() : storm::utility::convertNumber(lowerBound); + double roundedUpperBound = storm::utility::isInfinity(upperBound) ? storm::utility::infinity() : storm::utility::convertNumber(upperBound); + printResult(roundedLowerBound, roundedUpperBound); + STORM_PRINT_AND_LOG(")"); } } - if (pomdpSettings.getMemoryBound() > 1) { - STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); - storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); - std::cout << memory.toString() << std::endl; - storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); - pomdp = memoryUnfolder.transform(); - STORM_PRINT_AND_LOG(" done." << std::endl); - pomdp->printModelInformationToStream(std::cout); - } else { - STORM_PRINT_AND_LOG("Assumming memoryless schedulers." << std::endl;) + + template + bool performAnalysis(std::shared_ptr> const& pomdp, storm::pomdp::analysis::FormulaInformation const& formulaInfo, storm::logic::Formula const& formula) { + auto const& pomdpSettings = storm::settings::getModule(); + bool analysisPerformed = false; + if (pomdpSettings.isBeliefExplorationSet()) { + STORM_PRINT_AND_LOG("Exploring the belief MDP... "); + auto options = storm::pomdp::modelchecker::ApproximatePOMDPModelCheckerOptions(pomdpSettings.isBeliefExplorationDiscretizeSet(), pomdpSettings.isBeliefExplorationUnfoldSet()); + auto const& beliefExplorationSettings = storm::settings::getModule(); + beliefExplorationSettings.setValuesInOptionsStruct(options); + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker> checker(*pomdp, options); + auto result = checker.check(formula); + checker.printStatisticsToStream(std::cout); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("\nResult till abort: ") + } else { + STORM_PRINT_AND_LOG("\nResult: ") + } + printResult(result.lowerBound, result.upperBound); + STORM_PRINT_AND_LOG(std::endl); + analysisPerformed = true; + } + if (pomdpSettings.isMemlessSearchSet()) { + STORM_LOG_THROW(formulaInfo.isNonNestedReachabilityProbability(), storm::exceptions::NotSupportedException, "Qualitative memoryless scheduler search is not implemented for this property type."); + + // std::cout << std::endl; + // pomdp->writeDotToStream(std::cout); + // std::cout << std::endl; + // std::cout << std::endl; + storm::expressions::ExpressionManager expressionManager; + std::shared_ptr smtSolverFactory = std::make_shared(); + if (pomdpSettings.getMemlessSearchMethod() == "ccd16memless") { + storm::pomdp::QualitativeStrategySearchNaive memlessSearch(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.getTargetStates().states, formulaInfo.getSinkStates().states, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else if (pomdpSettings.getMemlessSearchMethod() == "iterative") { + storm::pomdp::MemlessStrategySearchQualitative memlessSearch(*pomdp, formulaInfo.getTargetStates().observations, formulaInfo.getTargetStates().states, formulaInfo.getSinkStates().states, smtSolverFactory); + memlessSearch.findNewStrategyForSomeState(5); + } else { + STORM_LOG_ERROR("This method is not implemented."); + } + analysisPerformed = true; + } + if (pomdpSettings.isCheckFullyObservableSet()) { + STORM_PRINT_AND_LOG("Analyzing the formula on the fully observable MDP ... "); + auto resultPtr = storm::api::verifyWithSparseEngine(pomdp->template as>(), storm::api::createTask(formula.asSharedPointer(), true)); + if (resultPtr) { + auto result = resultPtr->template asExplicitQuantitativeCheckResult(); + result.filter(storm::modelchecker::ExplicitQualitativeCheckResult(pomdp->getInitialStates())); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("\nResult till abort: ") + } else { + STORM_PRINT_AND_LOG("\nResult: ") + } + printResult(result.getMin(), result.getMax()); + STORM_PRINT_AND_LOG(std::endl); + } else { + STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl); + } + analysisPerformed = true; + } + return analysisPerformed; } - // From now on the pomdp is considered memoryless - if (pomdpSettings.isMecReductionSet()) { - STORM_PRINT_AND_LOG("Eliminating mec choices ..."); - // Note: Elimination of mec choices only preserves memoryless schedulers. - uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); - storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); - pomdp = mecChoiceEliminator.transform(*formula); - STORM_PRINT_AND_LOG(" done." << std::endl); - STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); - pomdp->printModelInformationToStream(std::cout); + template + bool performTransformation(std::shared_ptr>& pomdp, storm::logic::Formula const& formula) { + auto const& pomdpSettings = storm::settings::getModule(); + bool transformationPerformed = false; + bool memoryUnfolded = false; + if (pomdpSettings.getMemoryBound() > 1) { + STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); + storm::storage::PomdpMemory memory = storm::storage::PomdpMemoryBuilder().build(pomdpSettings.getMemoryPattern(), pomdpSettings.getMemoryBound()); + std::cout << memory.toString() << std::endl; + storm::transformer::PomdpMemoryUnfolder memoryUnfolder(*pomdp, memory); + pomdp = memoryUnfolder.transform(); + STORM_PRINT_AND_LOG(" done." << std::endl); + pomdp->printModelInformationToStream(std::cout); + transformationPerformed = true; + memoryUnfolded = true; + } + + // From now on the pomdp is considered memoryless + + if (pomdpSettings.isMecReductionSet()) { + STORM_PRINT_AND_LOG("Eliminating mec choices ..."); + // Note: Elimination of mec choices only preserves memoryless schedulers. + uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); + storm::transformer::GlobalPomdpMecChoiceEliminator mecChoiceEliminator(*pomdp); + pomdp = mecChoiceEliminator.transform(formula); + STORM_PRINT_AND_LOG(" done." << std::endl); + STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through MEC choice elimination." << std::endl); + pomdp->printModelInformationToStream(std::cout); + transformationPerformed = true; + } + + if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { + if (pomdpSettings.isTransformSimpleSet()) { + STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); + } else { + STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); + pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); + } + pomdp->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG(" done." << std::endl); + transformationPerformed = true; + } + + if (pomdpSettings.isExportToParametricSet()) { + STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); + storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); + std::string transformMode = pomdpSettings.getFscApplicationTypeString(); + auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); + STORM_PRINT_AND_LOG(" done." << std::endl); + pmc->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG("Simplifying pMC..."); + //if (generalSettings.isBisimulationSet()) { + pmc = storm::api::performBisimulationMinimization(pmc->template as>(),{formula.asSharedPointer()}, storm::storage::BisimulationType::Strong)->template as>(); + + //} + STORM_PRINT_AND_LOG(" done." << std::endl); + pmc->printModelInformationToStream(std::cout); + STORM_PRINT_AND_LOG("Exporting pMC..."); + storm::analysis::ConstraintCollector constraints(*pmc); + auto const& parameterSet = constraints.getVariables(); + std::vector parameters(parameterSet.begin(), parameterSet.end()); + std::vector parameterNames; + for (auto const& parameter : parameters) { + parameterNames.push_back(parameter.name()); + } + storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); + STORM_PRINT_AND_LOG(" done." << std::endl); + transformationPerformed = true; + } + if (transformationPerformed && !memoryUnfolded) { + STORM_PRINT_AND_LOG("Implicitly assumed restriction to memoryless schedulers for at least one transformation." << std::endl); + } + return transformationPerformed; } - if (pomdpSettings.isTransformBinarySet() || pomdpSettings.isTransformSimpleSet()) { - if (pomdpSettings.isTransformSimpleSet()) { - STORM_PRINT_AND_LOG("Transforming the POMDP to a simple POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, true); + template + void processOptionsWithValueTypeAndDdLib(storm::cli::SymbolicInput const& symbolicInput, storm::cli::ModelProcessingInformation const& mpi) { + auto const& pomdpSettings = storm::settings::getModule(); + + auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, mpi); + if (!model) { + STORM_PRINT_AND_LOG("No input model given." << std::endl); + return; + } + STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); + + std::shared_ptr> pomdp = model->template as>(); + if (!pomdpSettings.isNoCanonicSet()) { + storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); + pomdp = makeCanonic.transform(); + } + + std::shared_ptr formula; + if (!symbolicInput.properties.empty()) { + formula = symbolicInput.properties.front().getRawFormula(); + STORM_PRINT_AND_LOG("Analyzing property '" << *formula << "'" << std::endl); + STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); + } + + if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { + STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); + storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); + std::cout << uniqueAnalysis.analyse() << std::endl; + } + + if (formula) { + auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(*pomdp, *formula); + STORM_LOG_THROW(!formulaInfo.isUnsupported(), storm::exceptions::InvalidPropertyException, "The formula '" << *formula << "' is not supported by storm-pomdp."); + + storm::utility::Stopwatch sw(true); + // Note that formulaInfo contains state-based information which potentially needs to be updated during preprocessing + if (performPreprocessing(pomdp, formulaInfo, *formula)) { + sw.stop(); + STORM_PRINT_AND_LOG("Time for graph-based POMDP (pre-)processing: " << sw << "." << std::endl); + pomdp->printModelInformationToStream(std::cout); + } + + sw.restart(); + if (performAnalysis(pomdp, formulaInfo, *formula)) { + sw.stop(); + STORM_PRINT_AND_LOG("Time for POMDP analysis: " << sw << "." << std::endl); + } + + sw.restart(); + if (performTransformation(pomdp, *formula)) { + sw.stop(); + STORM_PRINT_AND_LOG("Time for POMDP transformation(s): " << sw << "." << std::endl); + } } else { - STORM_PRINT_AND_LOG("Transforming the POMDP to a binary POMDP."); - pomdp = storm::transformer::BinaryPomdpTransformer().transform(*pomdp, false); + STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); } - pomdp->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG(" done." << std::endl); + } - - if (pomdpSettings.isExportToParametricSet()) { - STORM_PRINT_AND_LOG("Transforming memoryless POMDP to pMC..."); - storm::transformer::ApplyFiniteSchedulerToPomdp toPMCTransformer(*pomdp); - std::string transformMode = pomdpSettings.getFscApplicationTypeString(); - auto pmc = toPMCTransformer.transform(storm::transformer::parsePomdpFscApplicationMode(transformMode)); - STORM_PRINT_AND_LOG(" done." << std::endl); - pmc->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG("Simplifying pMC..."); - //if (generalSettings.isBisimulationSet()) { - pmc = storm::api::performBisimulationMinimization(pmc->as>(),{formula}, storm::storage::BisimulationType::Strong)->as>(); - - //} - STORM_PRINT_AND_LOG(" done." << std::endl); - pmc->printModelInformationToStream(std::cout); - STORM_PRINT_AND_LOG("Exporting pMC..."); - storm::analysis::ConstraintCollector constraints(*pmc); - auto const& parameterSet = constraints.getVariables(); - std::vector parameters(parameterSet.begin(), parameterSet.end()); - std::vector parameterNames; - for (auto const& parameter : parameters) { - parameterNames.push_back(parameter.name()); - } - storm::api::exportSparseModelAsDrn(pmc, pomdpSettings.getExportToParametricFilename(), parameterNames); - STORM_PRINT_AND_LOG(" done." << std::endl); + template + void processOptionsWithDdLib(storm::cli::SymbolicInput const& symbolicInput, storm::cli::ModelProcessingInformation const& mpi) { + STORM_LOG_ERROR_COND(mpi.buildValueType == mpi.verificationValueType, "Build value type differs from verification value type. Will ignore Verification value type."); + switch (mpi.buildValueType) { + case storm::cli::ModelProcessingInformation::ValueType::FinitePrecision: + processOptionsWithValueTypeAndDdLib(symbolicInput, mpi); + break; + case storm::cli::ModelProcessingInformation::ValueType::Exact: + STORM_LOG_THROW(DdType == storm::dd::DdType::Sylvan, storm::exceptions::UnexpectedException, "Exact arithmetic is only supported with Dd library Sylvan."); + processOptionsWithValueTypeAndDdLib(symbolicInput, mpi); + break; + default: + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "Unexpected ValueType for model building."); + } + } + + void processOptions() { + auto symbolicInput = storm::cli::parseSymbolicInput(); + storm::cli::ModelProcessingInformation mpi; + std::tie(symbolicInput, mpi) = storm::cli::preprocessSymbolicInput(symbolicInput); + switch (mpi.ddType) { + case storm::dd::DdType::CUDD: + processOptionsWithDdLib(symbolicInput, mpi); + break; + case storm::dd::DdType::Sylvan: + processOptionsWithDdLib(symbolicInput, mpi); + break; + default: + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "Unexpected Dd Type."); + } } - - } else { - STORM_LOG_WARN("Nothing to be done. Did you forget to specify a formula?"); } + } +} + +/*! + * Entry point for the pomdp backend. + * + * @param argc The argc argument of main(). + * @param argv The argv argument of main(). + * @return Return code, 0 if successfull, not 0 otherwise. + */ +int main(const int argc, const char** argv) { + //try { + storm::utility::setUp(); + storm::cli::printHeader("Storm-pomdp", argc, argv); + storm::settings::initializePomdpSettings("Storm-POMDP", "storm-pomdp"); + bool optionsCorrect = storm::cli::parseOptions(argc, argv); + if (!optionsCorrect) { + return -1; + } + storm::cli::setUrgentOptions(); + // Invoke storm-pomdp with obtained settings + storm::pomdp::cli::processOptions(); // All operations have now been performed, so we clean up everything and terminate. storm::utility::cleanUp(); return 0; - } catch (storm::exceptions::BaseException const &exception) { - STORM_LOG_ERROR("An exception caused Storm-pomdp to terminate. The message of the exception is: " << exception.what()); - return 1; - } catch (std::exception const &exception) { - STORM_LOG_ERROR("An unexpected exception occurred and caused Storm-pomdp to terminate. The message of this exception is: " << exception.what()); - return 2; - } + // } catch (storm::exceptions::BaseException const &exception) { + // STORM_LOG_ERROR("An exception caused Storm-pomdp to terminate. The message of the exception is: " << exception.what()); + // return 1; + //} catch (std::exception const &exception) { + // STORM_LOG_ERROR("An unexpected exception occurred and caused Storm-pomdp to terminate. The message of this exception is: " << exception.what()); + // return 2; + //} } diff --git a/src/storm-pomdp/CMakeLists.txt b/src/storm-pomdp/CMakeLists.txt index 53643b738..37da8b794 100644 --- a/src/storm-pomdp/CMakeLists.txt +++ b/src/storm-pomdp/CMakeLists.txt @@ -17,7 +17,7 @@ set_target_properties(storm-pomdp PROPERTIES DEFINE_SYMBOL "") list(APPEND STORM_TARGETS storm-pomdp) set(STORM_TARGETS ${STORM_TARGETS} PARENT_SCOPE) -target_link_libraries(storm-pomdp PUBLIC storm ${STORM_POMDP_LINK_LIBRARIES}) +target_link_libraries(storm-pomdp PUBLIC storm storm-parsers ${STORM_POMDP_LINK_LIBRARIES}) # Install storm headers to include directory. foreach(HEADER ${STORM_POMDP_HEADERS}) diff --git a/src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h b/src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h new file mode 100644 index 000000000..e28b3a339 --- /dev/null +++ b/src/storm-pomdp/analysis/FiniteBeliefMdpDetection.h @@ -0,0 +1,59 @@ +#pragma once + +#include + +#include "storm/models/sparse/Pomdp.h" +#include "storm/storage/BitVector.h" +#include "storm/storage/StronglyConnectedComponentDecomposition.h" + +namespace storm { + namespace pomdp { + + /*! + * This method tries to detect that the beliefmdp is finite. + * If this returns true, the beliefmdp is certainly finite. + * However, if this returns false, the beliefmdp might still be finite + * It is assumed that the belief MDP is not further explored when reaching a targetstate + */ + template + bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp const& pomdp, boost::optional const& targetStates) { + // All infinite paths of the POMDP (including the ones with prob. 0 ) either + // - reach a target state after finitely many steps or + // - after finitely many steps enter an SCC and do not leave it + // Hence, any path of the belief MDP will at some point either reach a target state or stay in a set of POMDP SCCs. + // Only in the latter case we can get infinitely many different belief states. + // Below, we check whether all SCCs only consist of Dirac distributions. + // If this is the case, no new belief states will be found at some point. + + // Get the SCC decomposition + storm::storage::StronglyConnectedComponentDecompositionOptions options; + options.dropNaiveSccs(); + storm::storage::BitVector relevantStates; + if (targetStates) { + relevantStates = ~targetStates.get(); + options.subsystem(&relevantStates); + } + storm::storage::StronglyConnectedComponentDecomposition sccs(pomdp.getTransitionMatrix(), options); + + // Check whether all choices that stay within an SCC have Dirac distributions + for (auto const& scc : sccs) { + for (auto const& sccState : scc) { + for (uint64_t rowIndex = pomdp.getNondeterministicChoiceIndices()[sccState]; rowIndex < pomdp.getNondeterministicChoiceIndices()[sccState + 1]; ++rowIndex) { + for (auto const& entry : pomdp.getTransitionMatrix().getRow(rowIndex)) { + if (!storm::utility::isOne(entry.getValue()) && !storm::utility::isZero(entry.getValue())) { + if (scc.containsState(entry.getColumn())) { + // There is a non-dirac choice that stays in the SCC. + // This could still mean that the belief MDP is finite + // e.g., if at some point the branches merge back to the same state + return false; + } + } + } + } + } + } + + return true; + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/analysis/FormulaInformation.cpp b/src/storm-pomdp/analysis/FormulaInformation.cpp new file mode 100644 index 000000000..d648c6ee8 --- /dev/null +++ b/src/storm-pomdp/analysis/FormulaInformation.cpp @@ -0,0 +1,181 @@ +#include "storm-pomdp/analysis/FormulaInformation.h" +#include "storm/logic/Formulas.h" +#include "storm/logic/FragmentSpecification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/propositional/SparsePropositionalModelChecker.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/InvalidPropertyException.h" + +namespace storm { + namespace pomdp { + namespace analysis { + + bool FormulaInformation::StateSet::empty() const { + STORM_LOG_ASSERT(states.empty() == observations.empty(), "Inconsistent StateSet."); + return observations.empty(); + } + + FormulaInformation::FormulaInformation() : type(Type::Unsupported) { + // Intentionally left empty + } + + FormulaInformation::FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, boost::optional const& rewardModelName) : type(type), optimizationDirection(dir), rewardModelName(rewardModelName) { + STORM_LOG_ASSERT(!this->rewardModelName.is_initialized() || this->type == Type::NonNestedExpectedRewardFormula, "Got a reward model name for a non-reward formula."); + + } + + FormulaInformation::Type const& FormulaInformation::getType() const { + return type; + } + + bool FormulaInformation::isNonNestedReachabilityProbability() const { + return type == Type::NonNestedReachabilityProbability; + } + + bool FormulaInformation::isNonNestedExpectedRewardFormula() const { + return type == Type::NonNestedExpectedRewardFormula; + } + + bool FormulaInformation::isUnsupported() const { + return type == Type::Unsupported; + } + + typename FormulaInformation::StateSet const& FormulaInformation::getTargetStates() const { + STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::NonNestedReachabilityProbability, "Target states requested for unexpected formula type."); + return targetStates.get(); + } + + typename FormulaInformation::StateSet const& FormulaInformation::getSinkStates() const { + STORM_LOG_ASSERT(this->type == Type::NonNestedReachabilityProbability, "Sink states requested for unexpected formula type."); + return sinkStates.get(); + } + + std::string const& FormulaInformation::getRewardModelName() const { + STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula, "Reward model requested for unexpected formula type."); + return rewardModelName.get(); + } + + storm::solver::OptimizationDirection const& FormulaInformation::getOptimizationDirection() const { + return optimizationDirection; + } + + bool FormulaInformation::minimize() const { + return storm::solver::minimize(optimizationDirection); + } + + bool FormulaInformation::maximize() const { + return storm::solver::maximize(optimizationDirection); + } + + template + FormulaInformation::StateSet getStateSet(PomdpType const& pomdp, storm::storage::BitVector&& inputStates) { + FormulaInformation::StateSet result; + result.states = std::move(inputStates); + for (auto const& state : result.states) { + result.observations.insert(pomdp.getObservation(state)); + } + // check if this set is observation-closed, i.e., whether there is a state outside of this set with one of the observations collected above + result.observationClosed = true; + for (uint64_t state = result.states.getNextUnsetIndex(0); state < result.states.size(); state = result.states.getNextUnsetIndex(state + 1)) { + if (result.observations.count(pomdp.getObservation(state)) > 0) { + result.observationClosed = false; + break; + } + } + return result; + } + + template + void FormulaInformation::updateTargetStates(PomdpType const& pomdp, storm::storage::BitVector&& newTargetStates) { + STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::NonNestedReachabilityProbability, "Target states updated for unexpected formula type."); + targetStates = getStateSet(pomdp, std::move(newTargetStates)); + } + + template + void FormulaInformation::updateSinkStates(PomdpType const& pomdp, storm::storage::BitVector&& newSinkStates) { + STORM_LOG_ASSERT(this->type == Type::NonNestedReachabilityProbability, "Sink states requested for unexpected formula type."); + sinkStates = getStateSet(pomdp, std::move(newSinkStates)); + } + + template + storm::storage::BitVector getStates(storm::logic::Formula const& propositionalFormula, bool formulaInverted, PomdpType const& pomdp) { + storm::modelchecker::SparsePropositionalModelChecker mc(pomdp); + auto checkResult = mc.check(propositionalFormula); + storm::storage::BitVector resultBitVector(checkResult->asExplicitQualitativeCheckResult().getTruthValuesVector()); + if (formulaInverted) { + resultBitVector.complement(); + } + return resultBitVector; + } + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::ProbabilityOperatorFormula const& formula) { + STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); + STORM_LOG_WARN_COND(!formula.hasBound(), "The probability threshold for the given property will be ignored."); + auto const& subformula = formula.getSubformula(); + std::shared_ptr targetStatesFormula, constraintsStatesFormula; + if (subformula.isEventuallyFormula()) { + targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); + constraintsStatesFormula = storm::logic::Formula::getTrueFormula()->asSharedPointer(); + } else if (subformula.isUntilFormula()) { + storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); + targetStatesFormula = untilFormula.getRightSubformula().asSharedPointer(); + constraintsStatesFormula = untilFormula.getLeftSubformula().asSharedPointer(); + } + if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional()) && constraintsStatesFormula && constraintsStatesFormula->isInFragment(storm::logic::propositional())) { + FormulaInformation result(FormulaInformation::Type::NonNestedReachabilityProbability, formula.getOptimalityType()); + result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); + result.updateSinkStates(pomdp, getStates(*constraintsStatesFormula, true, pomdp)); + return result; + } + return FormulaInformation(); + } + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::RewardOperatorFormula const& formula) { + STORM_LOG_THROW(formula.hasOptimalityType(), storm::exceptions::InvalidPropertyException, "The property does not specify an optimization direction (min/max)"); + STORM_LOG_WARN_COND(!formula.hasBound(), "The reward threshold for the given property will be ignored."); + std::string rewardModelName = ""; + if (formula.hasRewardModelName()) { + rewardModelName = formula.getRewardModelName(); + STORM_LOG_THROW(pomdp.hasRewardModel(rewardModelName), storm::exceptions::InvalidPropertyException, "Selected reward model with name '" << rewardModelName << "' does not exist."); + } else { + STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::InvalidPropertyException, "Reward operator formula does not specify a reward model and the reward model is not unique."); + rewardModelName = pomdp.getUniqueRewardModelName(); + } + auto const& subformula = formula.getSubformula(); + std::shared_ptr targetStatesFormula; + if (subformula.isEventuallyFormula()) { + targetStatesFormula = subformula.asEventuallyFormula().getSubformula().asSharedPointer(); + } + if (targetStatesFormula && targetStatesFormula->isInFragment(storm::logic::propositional())) { + FormulaInformation result(FormulaInformation::Type::NonNestedExpectedRewardFormula, formula.getOptimalityType(), rewardModelName); + result.updateTargetStates(pomdp, getStates(*targetStatesFormula, false, pomdp)); + return result; + } + return FormulaInformation(); + } + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::Formula const& formula) { + if (formula.isProbabilityOperatorFormula()) { + return getFormulaInformation(pomdp, formula.asProbabilityOperatorFormula()); + } else if (formula.isRewardOperatorFormula()) { + return getFormulaInformation(pomdp, formula.asRewardOperatorFormula()); + } + return FormulaInformation(); + } + + template void FormulaInformation::updateTargetStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newTargetStates); + template void FormulaInformation::updateSinkStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newSinkStates); + template FormulaInformation getFormulaInformation>(storm::models::sparse::Pomdp const& pomdp, storm::logic::Formula const& formula); + template void FormulaInformation::updateTargetStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newTargetStates); + template void FormulaInformation::updateSinkStates>(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector&& newSinkStates); + template FormulaInformation getFormulaInformation>(storm::models::sparse::Pomdp const& pomdp, storm::logic::Formula const& formula); + + } + } +} diff --git a/src/storm-pomdp/analysis/FormulaInformation.h b/src/storm-pomdp/analysis/FormulaInformation.h new file mode 100644 index 000000000..10a92ab02 --- /dev/null +++ b/src/storm-pomdp/analysis/FormulaInformation.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include + +#include "storm/storage/BitVector.h" +#include "storm/solver/OptimizationDirection.h" + +namespace storm { + + namespace logic { + class Formula; + } + + namespace pomdp { + namespace analysis { + class FormulaInformation { + public: + /// Characterizes a certain set of states + struct StateSet { + storm::storage::BitVector states; // The set of states + std::set observations; // The set of the observations that are assigned to at least one state of the set + bool observationClosed; // True iff this state set can be uniquely characterized by the observations + bool empty() const; + }; + + /// Possible supported formula types + enum class Type { + NonNestedReachabilityProbability, // e.g. 'Pmax=? [F "target"]' or 'Pmin=? [!"sink" U "target"]' + NonNestedExpectedRewardFormula, // e.g. 'Rmin=? [F x>0 ]' + Unsupported // The formula type is unsupported + }; + + FormulaInformation(); // Unsupported + FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, boost::optional const& rewardModelName = boost::none); + + Type const& getType() const; + bool isNonNestedReachabilityProbability() const; + bool isNonNestedExpectedRewardFormula() const; + bool isUnsupported() const; + StateSet const& getTargetStates() const; + StateSet const& getSinkStates() const; // Shall not be called for reward formulas + std::string const& getRewardModelName() const; // Shall not be called for probability formulas + storm::solver::OptimizationDirection const& getOptimizationDirection() const; + bool minimize() const; + bool maximize() const; + + template + void updateTargetStates(PomdpType const& pomdp, storm::storage::BitVector&& newTargetStates); + + template + void updateSinkStates(PomdpType const& pomdp, storm::storage::BitVector&& newSinkStates); + + private: + Type type; + storm::solver::OptimizationDirection optimizationDirection; + boost::optional targetStates; + boost::optional sinkStates; + boost::optional rewardModelName; + }; + + template + FormulaInformation getFormulaInformation(PomdpType const& pomdp, storm::logic::Formula const& formula); + + + } + } +} diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp new file mode 100644 index 000000000..675c91aab --- /dev/null +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.cpp @@ -0,0 +1,205 @@ +#include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" + + +namespace storm { + namespace pomdp { + + template + void MemlessStrategySearchQualitative::initialize(uint64_t k) { + if (maxK == std::numeric_limits::max()) { + // not initialized at all. + // Create some data structures. + for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + actionSelectionVars.push_back(std::vector()); + actionSelectionVarExpressions.push_back(std::vector()); + statesPerObservation.push_back(std::vector()); // Consider using bitvectors instead. + } + + // Fill the states-per-observation mapping, + // declare the reachability variables, + // declare the path variables. + uint64_t stateId = 0; + for(auto obs : pomdp.getObservations()) { + pathVars.push_back(std::vector()); + for (uint64_t i = 0; i < k; ++i) { + pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); + } + reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId))); + reachVarExpressions.push_back(reachVars.back().getExpression()); + statesPerObservation.at(obs).push_back(stateId++); + } + assert(pathVars.size() == pomdp.getNumberOfStates()); + assert(reachVars.size() == pomdp.getNumberOfStates()); + assert(reachVarExpressions.size() == pomdp.getNumberOfStates()); + + // Create the action selection variables. + uint64_t obs = 0; + for(auto const& statesForObservation : statesPerObservation) { + for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { + std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); + actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName)); + actionSelectionVarExpressions.at(obs).push_back(actionSelectionVars.at(obs).back().getExpression()); + } + ++obs; + } + } else { + assert(false); + } + + uint64_t rowindex = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (targetStates.get(state)) { + smtSolver->add(pathVars[state][0]); + } else { + smtSolver->add(!pathVars[state][0]); + } + + if (surelyReachSinkStates.get(state)) { + smtSolver->add(!reachVarExpressions[state]); + } else if(!targetStates.get(state)) { + std::vector>> pathsubsubexprs; + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs.push_back(std::vector>()); + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubsubexprs.back().push_back(std::vector()); + } + } + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + std::vector subexprreach; + + subexprreach.push_back(!reachVarExpressions.at(state)); + subexprreach.push_back(!actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + subexprreach.push_back(reachVarExpressions.at(entries.getColumn())); + } + smtSolver->add(storm::expressions::disjunction(subexprreach)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs[j - 1][action].push_back(pathVars[entries.getColumn()][j - 1]); + } + } + rowindex++; + } + smtSolver->add(storm::expressions::implies(reachVarExpressions.at(state), pathVars.at(state).back())); + + for (uint64_t j = 1; j < k; ++j) { + std::vector pathsubexprs; + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubexprs.push_back(actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1][action])); + } + smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); + } + } + } + + for (auto const& actionVars : actionSelectionVarExpressions) { + smtSolver->add(storm::expressions::disjunction(actionVars)); + } + } + + template + bool MemlessStrategySearchQualitative::analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates) { + if (k < maxK) { + initialize(k); + } + + std::vector atLeastOneOfStates; + + for (uint64_t state : oneOfTheseStates) { + STORM_LOG_ASSERT(reachVarExpressions.size() > state, "state id " << state << " exceeds number of states (" << reachVarExpressions.size() << ")" ); + atLeastOneOfStates.push_back(reachVarExpressions[state]); + } + assert(atLeastOneOfStates.size() > 0); + smtSolver->add(storm::expressions::disjunction(atLeastOneOfStates)); + + for (uint64_t state : allOfTheseStates) { + assert(reachVarExpressions.size() > state); + smtSolver->add(reachVarExpressions[state]); + } + + std::cout << smtSolver->getSmtLibString() << std::endl; + + + std::vector> scheduler; + + while (true) { + + auto result = smtSolver->check(); + uint64_t i = 0; + + if (result == storm::solver::SmtSolver::CheckResult::Unknown) { + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); + } else if (result == storm::solver::SmtSolver::CheckResult::Unsat) { + std::cout << std::endl << "Unsatisfiable!" << std::endl; + return false; + } + + std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; + auto model = smtSolver->getModel(); + std::cout << "states that are okay" << std::endl; + + + storm::storage::BitVector observations(pomdp.getNrObservations()); + storm::storage::BitVector remainingstates(pomdp.getNumberOfStates()); + for (auto rv : reachVars) { + if (model->getBooleanValue(rv)) { + std::cout << i << " " << std::endl; + observations.set(pomdp.getObservation(i)); + } else { + remainingstates.set(i); + } + //std::cout << i << ": " << model->getBooleanValue(rv) << ", "; + ++i; + } + + scheduler.clear(); + + std::vector schedulerSoFar; + uint64_t obs = 0; + for (auto const &actionSelectionVarsForObs : actionSelectionVars) { + uint64_t act = 0; + scheduler.push_back(std::set()); + for (auto const &asv : actionSelectionVarsForObs) { + if (model->getBooleanValue(asv)) { + scheduler.back().insert(act); + schedulerSoFar.push_back(actionSelectionVarExpressions[obs][act]); + } + act++; + } + obs++; + } + + std::cout << "the scheduler: " << std::endl; + for (uint64_t obs = 0; obs < scheduler.size(); ++obs) { + if (observations.get(obs)) { + std::cout << "observation: " << obs << std::endl; + std::cout << "actions:"; + for (auto act : scheduler[obs]) { + std::cout << " " << act; + } + std::cout << std::endl; + } + } + + + std::vector remainingExpressions; + for (auto index : remainingstates) { + remainingExpressions.push_back(reachVarExpressions[index]); + } + + smtSolver->push(); + // Add scheduler + smtSolver->add(storm::expressions::conjunction(schedulerSoFar)); + smtSolver->add(storm::expressions::disjunction(remainingExpressions)); + + } + + } + + + template class MemlessStrategySearchQualitative; + template class MemlessStrategySearchQualitative; + } +} diff --git a/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h new file mode 100644 index 000000000..3ddba7586 --- /dev/null +++ b/src/storm-pomdp/analysis/MemlessStrategySearchQualitative.h @@ -0,0 +1,75 @@ +#include +#include "storm/storage/expressions/Expressions.h" +#include "storm/solver/SmtSolver.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/solver.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { +namespace pomdp { + + template + class MemlessStrategySearchQualitative { + // Implements an extension to the Chatterjee, Chmelik, Davies (AAAI-16) paper. + + + public: + MemlessStrategySearchQualitative(storm::models::sparse::Pomdp const& pomdp, + std::set const& targetObservationSet, + storm::storage::BitVector const& targetStates, + storm::storage::BitVector const& surelyReachSinkStates, + std::shared_ptr& smtSolverFactory) : + pomdp(pomdp), + targetObservations(targetObservationSet), + targetStates(targetStates), + surelyReachSinkStates(surelyReachSinkStates) { + this->expressionManager = std::make_shared(); + smtSolver = smtSolverFactory->create(*expressionManager); + + } + + void setSurelyReachSinkStates(storm::storage::BitVector const& surelyReachSink) { + surelyReachSinkStates = surelyReachSink; + } + + void analyzeForInitialStates(uint64_t k) { + analyze(k, pomdp.getInitialStates(), pomdp.getInitialStates()); + } + + void findNewStrategyForSomeState(uint64_t k) { + std::cout << surelyReachSinkStates << std::endl; + std::cout << targetStates << std::endl; + std::cout << (~surelyReachSinkStates & ~targetStates) << std::endl; + analyze(k, ~surelyReachSinkStates & ~targetStates); + + + } + + bool analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates = storm::storage::BitVector()); + + + private: + void initialize(uint64_t k); + + + std::unique_ptr smtSolver; + storm::models::sparse::Pomdp const& pomdp; + std::shared_ptr expressionManager; + uint64_t maxK = std::numeric_limits::max(); + + std::set targetObservations; + storm::storage::BitVector targetStates; + storm::storage::BitVector surelyReachSinkStates; + + std::vector> statesPerObservation; + std::vector> actionSelectionVarExpressions; // A_{z,a} + std::vector> actionSelectionVars; + std::vector reachVars; + std::vector reachVarExpressions; + std::vector> pathVars; + + + + }; +} +} diff --git a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp index 8eb39e5fb..e288ed4ef 100644 --- a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp +++ b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp @@ -69,81 +69,9 @@ namespace storm { storm::storage::BitVector QualitativeAnalysis::analyseProb1Max(storm::logic::UntilFormula const& formula) const { // We consider the states that satisfy the formula with prob.1 under arbitrary schedulers as goal states. storm::storage::BitVector goalStates = storm::utility::graph::performProb1A(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), checkPropositionalFormula(formula.getLeftSubformula()), checkPropositionalFormula(formula.getRightSubformula())); - + STORM_LOG_TRACE("Prob1A states according to MDP: " << goalStates); // Now find a set of observations such that there is a memoryless scheduler inducing prob. 1 for each state whose observation is in the set. - storm::storage::BitVector candidateStates = goalStates | checkPropositionalFormula(formula.getLeftSubformula()); - storm::storage::BitVector candidateActions = pomdp.getTransitionMatrix().getRowFilter(candidateStates); - storm::storage::BitVector candidateObservations(pomdp.getNrObservations(), true); - - - bool converged = false; - while (!converged) { - converged = true; - - // Get the candidate states that can reach the goal with prob1 via candidate actions - storm::storage::BitVector newCandidates; - if (candidateActions.full()) { - newCandidates = storm::utility::graph::performProb1E(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), candidateStates, goalStates); - } else { - storm::storage::SparseMatrix filteredTransitions(pomdp.getTransitionMatrix().filterEntries(candidateActions)); - newCandidates = storm::utility::graph::performProb1E(filteredTransitions, filteredTransitions.getRowGroupIndices(), filteredTransitions.transpose(true), candidateStates, goalStates); - } - if (candidateStates != newCandidates) { - converged = false; - candidateStates = std::move(newCandidates); - } - - // Unselect all observations that have a non-candidate state - for (uint64_t state = candidateStates.getNextUnsetIndex(0); state < candidateStates.size(); state = candidateStates.getNextUnsetIndex(state + 1)) { - candidateObservations.set(pomdp.getObservation(state), false); - } - - // update the candidate actions to the set of actions that stay inside the candidate state set - std::vector candidateActionsPerObservation(pomdp.getNrObservations()); - for (auto const& state : candidateStates) { - auto& candidateActionsAtState = candidateActionsPerObservation[pomdp.getObservation(state)]; - if (candidateActionsAtState.size() == 0) { - candidateActionsAtState.resize(pomdp.getNumberOfChoices(state), true); - } - STORM_LOG_ASSERT(candidateActionsAtState.size() == pomdp.getNumberOfChoices(state), "State " + std::to_string(state) + " has " + std::to_string(pomdp.getNumberOfChoices(state)) + " actions, different from other with same observation (" + std::to_string(candidateActionsAtState.size()) + ")." ); - for (auto const& action : candidateActionsAtState) { - for (auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) { - if (!candidateStates.get(entry.getColumn())) { - candidateActionsAtState.set(action, false); - break; - } - } - } - } - - // Unselect all observations without such an action - for (auto const& o : candidateObservations) { - if (candidateActionsPerObservation[o].empty()) { - candidateObservations.set(o, false); - } - } - - // only keep the candidate states with a candidateObservation - for (auto const& state : candidateStates) { - if (!candidateObservations.get(pomdp.getObservation(state)) && !goalStates.get(state)) { - candidateStates.set(state, false); - converged = false; - } - } - - // Only keep the candidate actions originating from a candidateState. Also transform the representation of candidate actions - candidateActions.clear(); - for (auto const& state : candidateStates) { - uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state]; - for (auto const& action : candidateActionsPerObservation[pomdp.getObservation(state)]) { - candidateActions.set(offset + action); - } - } - } - - assert(goalStates.isSubsetOf(candidateStates)); - - return candidateStates; + return goalStates; } @@ -161,6 +89,8 @@ namespace storm { template class QualitativeAnalysis; - + + template + class QualitativeAnalysis; } } \ No newline at end of file diff --git a/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp new file mode 100644 index 000000000..f1ec0b8e8 --- /dev/null +++ b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.cpp @@ -0,0 +1,186 @@ + + +#include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" + + +namespace storm { + namespace pomdp { + + template + void QualitativeStrategySearchNaive::initialize(uint64_t k) { + if (maxK == std::numeric_limits::max()) { + // not initialized at all. + // Create some data structures. + for(uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + actionSelectionVars.push_back(std::vector()); + actionSelectionVarExpressions.push_back(std::vector()); + statesPerObservation.push_back(std::vector()); // Consider using bitvectors instead. + } + + // Fill the states-per-observation mapping, + // declare the reachability variables, + // declare the path variables. + uint64_t stateId = 0; + for(auto obs : pomdp.getObservations()) { + pathVars.push_back(std::vector()); + for (uint64_t i = 0; i < k; ++i) { + pathVars.back().push_back(expressionManager->declareBooleanVariable("P-"+std::to_string(stateId)+"-"+std::to_string(i)).getExpression()); + } + reachVars.push_back(expressionManager->declareBooleanVariable("C-" + std::to_string(stateId))); + reachVarExpressions.push_back(reachVars.back().getExpression()); + statesPerObservation.at(obs).push_back(stateId++); + } + assert(pathVars.size() == pomdp.getNumberOfStates()); + + // Create the action selection variables. + uint64_t obs = 0; + for(auto const& statesForObservation : statesPerObservation) { + for (uint64_t a = 0; a < pomdp.getNumberOfChoices(statesForObservation.front()); ++a) { + std::string varName = "A-" + std::to_string(obs) + "-" + std::to_string(a); + actionSelectionVars.at(obs).push_back(expressionManager->declareBooleanVariable(varName)); + actionSelectionVarExpressions.at(obs).push_back(actionSelectionVars.at(obs).back().getExpression()); + } + ++obs; + } + } else { + assert(false); + } + + uint64_t rowindex = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (targetStates.get(state)) { + smtSolver->add(pathVars[state][0]); + } else { + smtSolver->add(!pathVars[state][0]); + } + + if (surelyReachSinkStates.get(state)) { + smtSolver->add(!reachVarExpressions[state]); + } else if(!targetStates.get(state)) { + std::vector>> pathsubsubexprs; + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs.push_back(std::vector>()); + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubsubexprs.back().push_back(std::vector()); + } + } + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + std::vector subexprreach; + + subexprreach.push_back(!reachVarExpressions.at(state)); + subexprreach.push_back(!actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + subexprreach.push_back(reachVarExpressions.at(entries.getColumn())); + } + smtSolver->add(storm::expressions::disjunction(subexprreach)); + for (auto const &entries : pomdp.getTransitionMatrix().getRow(rowindex)) { + for (uint64_t j = 1; j < k; ++j) { + pathsubsubexprs[j - 1][action].push_back(pathVars[entries.getColumn()][j - 1]); + } + } + rowindex++; + } + smtSolver->add(storm::expressions::implies(reachVarExpressions.at(state), pathVars.at(state).back())); + + for (uint64_t j = 1; j < k; ++j) { + std::vector pathsubexprs; + + for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) { + pathsubexprs.push_back(actionSelectionVarExpressions.at(pomdp.getObservation(state)).at(action) && storm::expressions::disjunction(pathsubsubexprs[j - 1][action])); + } + smtSolver->add(storm::expressions::iff(pathVars[state][j], storm::expressions::disjunction(pathsubexprs))); + } + } + } + + for (auto const& actionVars : actionSelectionVarExpressions) { + smtSolver->add(storm::expressions::disjunction(actionVars)); + } + } + + template + bool QualitativeStrategySearchNaive::analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates) { + if (k < maxK) { + initialize(k); + } + + std::vector atLeastOneOfStates; + + for(uint64_t state : oneOfTheseStates) { + atLeastOneOfStates.push_back(reachVarExpressions[state]); + } + assert(atLeastOneOfStates.size() > 0); + smtSolver->add(storm::expressions::disjunction(atLeastOneOfStates)); + + for(uint64_t state : allOfTheseStates) { + smtSolver->add(reachVarExpressions[state]); + } + + + + std::cout << smtSolver->getSmtLibString() << std::endl; + + auto result = smtSolver->check(); + uint64_t i = 0; + smtSolver->push(); + + + + if (result == storm::solver::SmtSolver::CheckResult::Unknown) { + STORM_LOG_THROW(false, storm::exceptions::UnexpectedException, "SMT solver yielded an unexpected result"); + } else if(result == storm::solver::SmtSolver::CheckResult::Unsat) { + std::cout << std::endl << "Unsatisfiable!" << std::endl; + return false; + } else { + + std::cout << std::endl << "Satisfying assignment: " << std::endl << smtSolver->getModelAsValuation().toString(true) << std::endl; + auto model = smtSolver->getModel(); + std::cout << "states that are okay" << std::endl; + storm::storage::BitVector observations(pomdp.getNrObservations()); + storm::storage::BitVector remainingstates(pomdp.getNumberOfStates()); + for (auto rv : reachVars) { + if (model->getBooleanValue(rv)) { + std::cout << i << " " << std::endl; + observations.set(pomdp.getObservation(i)); + } else { + remainingstates.set(i); + } + //std::cout << i << ": " << model->getBooleanValue(rv) << ", "; + ++i; + } + std::vector > scheduler; + for (auto const &actionSelectionVarsForObs : actionSelectionVars) { + uint64_t act = 0; + scheduler.push_back(std::set()); + for (auto const &asv : actionSelectionVarsForObs) { + if (model->getBooleanValue(asv)) { + scheduler.back().insert(act); + } + act++; + } + } + std::cout << "the scheduler: " << std::endl; + for (uint64_t obs = 0; obs < scheduler.size(); ++obs) { + if (observations.get(obs)) { + std::cout << "observation: " << obs << std::endl; + std::cout << "actions:"; + for (auto act : scheduler[obs]) { + std::cout << " " << act; + } + std::cout << std::endl; + } + } + + + return true; + } + + + + } + + template class QualitativeStrategySearchNaive; + template class QualitativeStrategySearchNaive; + } +} diff --git a/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h new file mode 100644 index 000000000..97dc0f679 --- /dev/null +++ b/src/storm-pomdp/analysis/QualitativeStrategySearchNaive.h @@ -0,0 +1,74 @@ +#include +#include "storm/storage/expressions/Expressions.h" +#include "storm/solver/SmtSolver.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/solver.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { + namespace pomdp { + + template + class QualitativeStrategySearchNaive { + // Implements an extension to the Chatterjee, Chmelik, Davies (AAAI-16) paper. + + + public: + QualitativeStrategySearchNaive(storm::models::sparse::Pomdp const& pomdp, + std::set const& targetObservationSet, + storm::storage::BitVector const& targetStates, + storm::storage::BitVector const& surelyReachSinkStates, + std::shared_ptr& smtSolverFactory) : + pomdp(pomdp), + targetObservations(targetObservationSet), + targetStates(targetStates), + surelyReachSinkStates(surelyReachSinkStates) { + this->expressionManager = std::make_shared(); + smtSolver = smtSolverFactory->create(*expressionManager); + + } + + void setSurelyReachSinkStates(storm::storage::BitVector const& surelyReachSink) { + surelyReachSinkStates = surelyReachSink; + } + + void analyzeForInitialStates(uint64_t k) { + analyze(k, pomdp.getInitialStates(), pomdp.getInitialStates()); + } + + void findNewStrategyForSomeState(uint64_t k) { + std::cout << surelyReachSinkStates << std::endl; + std::cout << targetStates << std::endl; + std::cout << (~surelyReachSinkStates & ~targetStates) << std::endl; + analyze(k, ~surelyReachSinkStates & ~targetStates); + + + } + + bool analyze(uint64_t k, storm::storage::BitVector const& oneOfTheseStates, storm::storage::BitVector const& allOfTheseStates = storm::storage::BitVector()); + + private: + void initialize(uint64_t k); + + + std::unique_ptr smtSolver; + storm::models::sparse::Pomdp const& pomdp; + std::shared_ptr expressionManager; + uint64_t maxK = std::numeric_limits::max(); + + std::set targetObservations; + storm::storage::BitVector targetStates; + storm::storage::BitVector surelyReachSinkStates; + + std::vector> statesPerObservation; + std::vector> actionSelectionVarExpressions; // A_{z,a} + std::vector> actionSelectionVars; + std::vector reachVars; + std::vector reachVarExpressions; + std::vector> pathVars; + + + + }; + } +} diff --git a/src/storm-pomdp/analysis/UniqueObservationStates.cpp b/src/storm-pomdp/analysis/UniqueObservationStates.cpp index 5fc8e1426..e71ef162d 100644 --- a/src/storm-pomdp/analysis/UniqueObservationStates.cpp +++ b/src/storm-pomdp/analysis/UniqueObservationStates.cpp @@ -30,6 +30,8 @@ namespace storm { } template class UniqueObservationStates; - + + template + class UniqueObservationStates; } } \ No newline at end of file diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h new file mode 100644 index 000000000..32adf985c --- /dev/null +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -0,0 +1,840 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "storm-parsers/api/properties.h" +#include "storm/api/properties.h" +#include "storm/api/verification.h" + +#include "storm/storage/BitVector.h" +#include "storm/storage/SparseMatrix.h" +#include "storm/utility/macros.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" +#include "storm/utility/SignalHandler.h" +#include "storm/modelchecker/results/CheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" + + +namespace storm { + namespace builder { + template + class BeliefMdpExplorer { + public: + typedef typename PomdpType::ValueType ValueType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef typename BeliefManagerType::BeliefId BeliefId; + typedef uint64_t MdpStateType; + + enum class Status { + Uninitialized, + Exploring, + ModelFinished, + ModelChecked + }; + + BeliefMdpExplorer(std::shared_ptr beliefManager, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds) : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), status(Status::Uninitialized) { + // Intentionally left empty + } + BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; + + BeliefManagerType const& getBeliefManager() const { + return *beliefManager; + } + + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + status = Status::Exploring; + // Reset data from potential previous explorations + mdpStateToBeliefIdMap.clear(); + beliefIdToMdpStateMap.clear(); + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + mdpStatesToExplore.clear(); + lowerValueBounds.clear(); + upperValueBounds.clear(); + values.clear(); + exploredMdpTransitions.clear(); + exploredChoiceIndices.clear(); + mdpActionRewards.clear(); + targetStates.clear(); + truncatedStates.clear(); + delayedExplorationChoices.clear(); + optimalChoices = boost::none; + optimalChoicesReachableMdpStates = boost::none; + exploredMdp = nullptr; + internalAddRowGroupIndex(); // Mark the start of the first row group + + // Add some states with special treatment (if requested) + if (extraBottomStateValue) { + currentMdpState = getCurrentNumberOfMdpStates(); + extraBottomState = currentMdpState; + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); + + internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); + internalAddRowGroupIndex(); + } else { + extraBottomState = boost::none; + } + if (extraTargetStateValue) { + currentMdpState = getCurrentNumberOfMdpStates(); + extraTargetState = currentMdpState; + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); + + internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); + internalAddRowGroupIndex(); + + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(extraTargetState.get(), true); + } else { + extraTargetState = boost::none; + } + currentMdpState = noState(); + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + /*! + * Restarts the exploration to allow re-exploring each state. + * After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty + * This method inserts the initial state of the MDP in the exploration queue. + * While re-exploring, the reference to the old MDP remains valid. + */ + void restartExploration() { + STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); + status = Status::Exploring; + // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + exploredMdpTransitions.clear(); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); + exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); + mdpActionRewards.clear(); + if (exploredMdp->hasRewardModel()) { + // Can be overwritten during exploration + mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); + } + targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + delayedExplorationChoices.clear(); + mdpStatesToExplore.clear(); + + // The extra states are not changed + if (extraBottomState) { + currentMdpState = extraBottomState.get(); + restoreOldBehaviorAtCurrentState(0); + } + if (extraTargetState) { + currentMdpState = extraTargetState.get(); + restoreOldBehaviorAtCurrentState(0); + targetStates.set(extraTargetState.get(), true); + } + currentMdpState = noState(); + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + bool hasUnexploredState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return !mdpStatesToExplore.empty(); + } + + BeliefId exploreNextState() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + // Mark the end of the previously explored row group. + if (currentMdpState != noState() && !currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + + // Pop from the queue. + currentMdpState = mdpStatesToExplore.front(); + mdpStatesToExplore.pop_front(); + + + return mdpStateToBeliefIdMap[currentMdpState]; + } + + void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + if (!storm::utility::isZero(bottomStateValue)) { + STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); + internalAddTransition(row, extraBottomState.get(), bottomStateValue); + } + if (!storm::utility::isZero(targetStateValue)) { + STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); + internalAddTransition(row, extraTargetState.get(), targetStateValue); + } + } + + void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + internalAddTransition(row, getCurrentMdpState(), value); + } + + /*! + * Adds the next transition to the given successor belief + * @param localActionIndex + * @param transitionTarget + * @param value + * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. + * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. + */ + bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + + MdpStateType column; + if (ignoreNewBeliefs) { + column = getExploredMdpState(transitionTarget); + if (column == noState()) { + return false; + } + } else { + column = getOrAddMdpState(transitionTarget); + } + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + internalAddTransition(row, column, value); + return true; + } + + void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + } + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; + } + + void setCurrentStateIsTarget() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(getCurrentMdpState(), true); + } + + void setCurrentStateIsTruncated() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + truncatedStates.grow(getCurrentNumberOfMdpStates(), false); + truncatedStates.set(getCurrentMdpState(), true); + } + + void setCurrentChoiceIsDelayed(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(), false); + delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex, true); + } + + bool currentStateHasOldBehavior() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); + return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); + } + + bool getCurrentStateWasTruncated() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); + STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); + return exploredMdp->getStateLabeling().getStateHasLabel("truncated", getCurrentMdpState()); + } + + /*! + * Retrieves whether the current state can be reached under an optimal scheduler + * This requires a previous call of computeOptimalChoicesAndReachableMdpStates. + */ + bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + return optimalChoicesReachableMdpStates->get(mdpState); + } + + /*! + * Retrieves whether the given action at the current state was optimal in the most recent check. + * This requires a previous call of computeOptimalChoicesAndReachableMdpStates. + */ + bool actionIsOptimal(uint64_t const& globalActionIndex) const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + return optimalChoices->get(globalActionIndex); + } + + /*! + * Retrieves whether the current state can be reached under a scheduler that was optimal in the most recent check. + * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. + */ + bool currentStateIsOptimalSchedulerReachable() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior"); + STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + return optimalChoicesReachableMdpStates->get(getCurrentMdpState()); + } + + /*! + * Retrieves whether the given action at the current state was optimal in the most recent check. + * This requires (i) a previous call of computeOptimalChoicesAndReachableMdpStates and (ii) that the current state has old behavior. + */ + bool actionAtCurrentStateWasOptimal(uint64_t const& localActionIndex) const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); + STORM_LOG_ASSERT(optimalChoices.is_initialized(), "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before."); + uint64_t choice = getStartOfCurrentRowGroup() + localActionIndex; + return optimalChoices->get(choice); + } + + bool getCurrentStateActionExplorationWasDelayed(uint64_t const& localActionIndex) const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior"); + STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available"); + uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex; + return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getChoiceHasLabel("delayed", choice); + } + + /*! + * Inserts transitions and rewards at the given action as in the MDP of the previous exploration. + * Does NOT set whether the state is truncated and/or target. + * Will add "old" states that have not been considered before into the exploration queue + * @param localActionIndex + */ + void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); + STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + + uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); + + // Insert the transitions + for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { + internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); + // Check whether exploration is needed + auto beliefId = getBeliefId(transition.getColumn()); + if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state + if (!exploredBeliefIds.get(beliefId)) { + // This belief needs exploration + exploredBeliefIds.set(beliefId, true); + mdpStatesToExplore.push_back(transition.getColumn()); + } + } + } + + // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values + } + + void finishExploration() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + // Complete the exploration + // Finish the last row grouping in case the last explored state was new + if (!currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + // Resize state- and choice based vectors to the correct size + targetStates.resize(getCurrentNumberOfMdpStates(), false); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + } + + // We are not exploring anymore + currentMdpState = noState(); + + // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). + // We get rid of these before rebuilding the model + if (exploredMdp) { + dropUnexploredStates(); + } + + // The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore. + optimalChoices = boost::none; + optimalChoicesReachableMdpStates = boost::none; + + // Create the tranistion matrix + uint64_t entryCount = 0; + for (auto const& row : exploredMdpTransitions) { + entryCount += row.size(); + } + storm::storage::SparseMatrixBuilder builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + builder.newRowGroup(rowIndex); + for (; rowIndex < groupEnd; ++rowIndex) { + for (auto const& entry : exploredMdpTransitions[rowIndex]) { + builder.addNextValue(rowIndex, entry.first, entry.second); + } + } + } + auto mdpTransitionMatrix = builder.build(); + + // Create a standard labeling + storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabelToState("init", initialMdpState); + targetStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("target", std::move(targetStates)); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("truncated", std::move(truncatedStates)); + + // Create a standard reward model (if rewards are available) + std::unordered_map> mdpRewardModels; + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", + storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + } + + // Create model components + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); + + // Potentially create a choice labeling + if (!delayedExplorationChoices.empty()) { + modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices()); + delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(), false); + modelComponents.choiceLabeling->addLabel("delayed", std::move(delayedExplorationChoices)); + } + + // Create the final model. + exploredMdp = std::make_shared>(std::move(modelComponents)); + status = Status::ModelFinished; + STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); + + } + + void dropUnexploredStates() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); + // Find the states (and corresponding choices) that were not explored. + // These correspond to "empty" MDP transitions + storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); + std::vector toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); + MdpStateType nextRelevantIndex = 0; + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + // Check first row in group + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); + relevantMdpStates.set(groupIndex, false); + } else { + toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; + ++nextRelevantIndex; + } + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + // process remaining rows in group + for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { + // Assert that all actions at the current state were consistently explored or unexplored. + STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); + } + } + } + + if (relevantMdpStates.full()) { + // All states are relevant so nothing to do + return; + } + + // Translate various components to the "new" MDP state set + storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); + { // beliefIdToMdpStateMap + for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { + if (relevantMdpStates.get(belIdToMdpStateIt->second)) { + // Translate current entry and move on to the next one. + belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; + ++belIdToMdpStateIt; + } else { + STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); + // Delete current entry and move on to the next one. + // This works because std::map::erase does not invalidate other iterators within the map! + beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); + } + } + } + { // exploredMdpTransitions + storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); + // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map + // It might payoff to do this when building the matrix. + for (auto& transitions : exploredMdpTransitions) { + std::map newTransitions; + for (auto const& entry : transitions) { + STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); + newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); + } + transitions = std::move(newTransitions); + } + } + { // exploredChoiceIndices + MdpStateType newState = 0; + assert(exploredChoiceIndices[0] == 0u); + // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. + for (auto const& oldState : relevantMdpStates) { + if (oldState != newState) { + assert(oldState > newState); + uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; + exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; + } + ++newState; + } + exploredChoiceIndices.resize(newState + 1); + } + if (!mdpActionRewards.empty()) { + storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); + } + if (extraBottomState) { + extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; + } + if (extraTargetState) { + extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; + } + targetStates = targetStates % relevantMdpStates; + truncatedStates = truncatedStates % relevantMdpStates; + initialMdpState = toRelevantStateIndexMap[initialMdpState]; + + storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); + + } + + std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); + return exploredMdp; + } + + MdpStateType getCurrentNumberOfMdpStates() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap.size(); + } + + MdpStateType getCurrentNumberOfMdpChoices() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredMdpTransitions.size(); + } + + MdpStateType getStartOfCurrentRowGroup() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredChoiceIndices[getCurrentMdpState()]; + } + + ValueType getLowerValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return lowerValueBounds[getCurrentMdpState()]; + } + + ValueType getUpperValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return upperValueBounds[getCurrentMdpState()]; + } + + ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { + STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available."); + auto it = pomdpValueBounds.lower.begin(); + ValueType result = beliefManager->getWeightedSum(beliefId, *it); + for (++it; it != pomdpValueBounds.lower.end(); ++it) { + result = std::max(result, beliefManager->getWeightedSum(beliefId, *it)); + } + return result; + } + + ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { + STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available."); + auto it = pomdpValueBounds.upper.begin(); + ValueType result = beliefManager->getWeightedSum(beliefId, *it); + for (++it; it != pomdpValueBounds.upper.end(); ++it) { + result = std::min(result, beliefManager->getWeightedSum(beliefId, *it)); + } + return result; + } + + void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); + auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); + auto task = createStandardCheckTask(property); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); + if (res) { + values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal()), "Computed values are smaller than the lower bound."); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal()), "Computed values are larger than the upper bound."); + } else { + STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); + STORM_LOG_ERROR("No result obtained while checking."); + } + status = Status::ModelChecked; + } + + bool hasComputedValues() const { + return status == Status::ModelChecked; + } + + std::vector const& getValuesOfExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + return values; + } + + ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); + return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; + } + + MdpStateType getBeliefId(MdpStateType exploredMdpState) const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap[exploredMdpState]; + } + + struct SuccessorObservationInformation { + SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { + // Intentionally left empty. + } + + void join(SuccessorObservationInformation other) { /// Does not join support (for performance reasons) + observationProbability += other.observationProbability; + maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); + successorWithObsCount += other.successorWithObsCount; + } + + ValueType observationProbability; /// The probability we move to the corresponding observation. + ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. + uint64_t successorWithObsCount; /// The number of successor beliefstates with this observation + typename BeliefManagerType::BeliefSupportType support; + }; + + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map& gatheredSuccessorObservations) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + + } + + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map& gatheredSuccessorObservations) { + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (beliefId != beliefManager->noId()) { + auto const& obs = beliefManager->getBeliefObservation(beliefId); + SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); + auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); + if (!obsInsertion.second) { + // There already is an entry for this observation, so join the two informations + obsInsertion.first->second.join(info); + } + beliefManager->joinSupport(beliefId, obsInsertion.first->second.support); + } + } + } + + bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const& observationSet) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (observationSet.get(beliefManager->getBeliefObservation(beliefId))) { + return true; + } + } + return false; + } + + void takeCurrentValuesAsUpperBounds() { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + upperValueBounds = values; + } + + void takeCurrentValuesAsLowerBounds() { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + lowerValueBounds = values; + } + + /*! + * + * Computes the set of states that are reachable via a path that is consistent with an optimal MDP scheduler. + * States that are only reachable via target states will not be in this set. + * @param ancillaryChoicesEpsilon if the difference of a 1-step value of a choice is only epsilon away from the optimal value, the choice will be included. + * @param relative if set, we consider the relative difference to detect ancillaryChoices + */ + void computeOptimalChoicesAndReachableMdpStates(ValueType const& ancillaryChoicesEpsilon, bool relativeDifference) { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available."); + STORM_LOG_ASSERT(!optimalChoices.is_initialized(), "Tried to compute optimal scheduler but this has already been done before."); + STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.is_initialized(), "Tried to compute states that are reachable under an optimal scheduler but this has already been done before."); + + // First find the choices that are optimal + optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false); + auto const& choiceIndices = exploredMdp->getNondeterministicChoiceIndices(); + auto const& transitions = exploredMdp->getTransitionMatrix(); + auto const& targetStates = exploredMdp->getStates("target"); + for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) { + if (targetStates.get(mdpState)) { + // Target states can be skipped. + continue; + } else { + auto const& stateValue = values[mdpState]; + for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) { + ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values); + if (exploredMdp->hasRewardModel()) { + choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice); + } + ValueType absDiff = storm::utility::abs((choiceValue - stateValue)); + if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) { + optimalChoices->set(globalChoice, true); + } + } + STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice."); + } + } + + // Then, find the states that are reachable via these choices + optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStates, targetStates, false, 0, optimalChoices.get()); + } + + private: + MdpStateType noState() const { + return std::numeric_limits::max(); + } + + std::shared_ptr createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += storm::solver::minimize(dir) ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(values); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + return task; + } + + MdpStateType getCurrentMdpState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return currentMdpState; + } + + MdpStateType getCurrentBeliefId() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return getBeliefId(getCurrentMdpState()); + } + + void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { + STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); + if (row == exploredMdpTransitions.size()) { + exploredMdpTransitions.emplace_back(); + } + STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); + exploredMdpTransitions[row][column] = value; + } + + void internalAddRowGroupIndex() { + exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); + } + + MdpStateType getExploredMdpState(BeliefId const& beliefId) const { + if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { + return beliefIdToMdpStateMap.at(beliefId); + } else { + return noState(); + } + } + + void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { + lowerValueBounds.push_back(lowerBound); + upperValueBounds.push_back(upperBound); + // Take the middle value as a hint + values.push_back((lowerBound + upperBound) / storm::utility::convertNumber(2)); + STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); + STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); + } + + MdpStateType getOrAddMdpState(BeliefId const& beliefId) { + exploredBeliefIds.grow(beliefId + 1, false); + if (exploredBeliefIds.get(beliefId)) { + return beliefIdToMdpStateMap[beliefId]; + } else { + // This state needs exploration + exploredBeliefIds.set(beliefId, true); + + // If this is a restart of the exploration, we still might have an MDP state for the belief + if (exploredMdp) { + auto findRes = beliefIdToMdpStateMap.find(beliefId); + if (findRes != beliefIdToMdpStateMap.end()) { + mdpStatesToExplore.push_back(findRes->second); + return findRes->second; + } + } + // At this point we need to add a new MDP state + MdpStateType result = getCurrentNumberOfMdpStates(); + assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); + mdpStateToBeliefIdMap.push_back(beliefId); + beliefIdToMdpStateMap[beliefId] = result; + insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + mdpStatesToExplore.push_back(result); + return result; + } + } + + // Belief state related information + std::shared_ptr beliefManager; + std::vector mdpStateToBeliefIdMap; + std::map beliefIdToMdpStateMap; + storm::storage::BitVector exploredBeliefIds; + + // Exploration information + std::deque mdpStatesToExplore; + std::vector> exploredMdpTransitions; + std::vector exploredChoiceIndices; + std::vector mdpActionRewards; + uint64_t currentMdpState; + + // Special states and choices during exploration + boost::optional extraTargetState; + boost::optional extraBottomState; + storm::storage::BitVector targetStates; + storm::storage::BitVector truncatedStates; + MdpStateType initialMdpState; + storm::storage::BitVector delayedExplorationChoices; + + // Final Mdp + std::shared_ptr> exploredMdp; + + // Value and scheduler related information + storm::pomdp::modelchecker::TrivialPomdpValueBounds pomdpValueBounds; + std::vector lowerValueBounds; + std::vector upperValueBounds; + std::vector values; // Contains an estimate during building and the actual result after a check has performed + boost::optional optimalChoices; + boost::optional optimalChoicesReachableMdpStates; + + // The current status of this explorer + Status status; + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h new file mode 100644 index 000000000..c24248467 --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include "storm/utility/constants.h" +#include "storm/utility/NumberTraits.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + struct ApproximatePOMDPModelCheckerOptions { + ApproximatePOMDPModelCheckerOptions(bool discretize, bool unfold) : discretize(discretize), unfold(unfold) { + // Intentionally left empty + } + + bool discretize; + bool unfold; + bool refine = false; + boost::optional refineStepLimit; + ValueType refinePrecision = storm::utility::zero(); + boost::optional explorationTimeLimit; + + // Controlparameters for the refinement heuristic + // Discretization Resolution + uint64_t resolutionInit = 2; + ValueType resolutionFactor = storm::utility::convertNumber(2); + // The maximal number of newly expanded MDP states in a refinement step + uint64_t sizeThresholdInit = 0; + ValueType sizeThresholdFactor = storm::utility::convertNumber(4); + // Controls how large the gap between known lower- and upper bounds at a beliefstate needs to be in order to explore + ValueType gapThresholdInit = storm::utility::convertNumber(0.1); + ValueType gapThresholdFactor = storm::utility::convertNumber(0.25); + // Controls whether "almost optimal" choices will be considered optimal + ValueType optimalChoiceValueThresholdInit = storm::utility::convertNumber(1e-3); + ValueType optimalChoiceValueThresholdFactor = storm::utility::one(); + // Controls which observations are refined. + ValueType obsThresholdInit = storm::utility::convertNumber(0.1); + ValueType obsThresholdIncrementFactor = storm::utility::convertNumber(0.1); + + ValueType numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); /// Used to decide whether two beliefs are equal + bool dynamicTriangulation = true; // Sets whether the triangulation is done in a dynamic way (yielding more precise triangulations) + }; + } + } +} diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp new file mode 100644 index 000000000..2e8619819 --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -0,0 +1,839 @@ +#include "ApproximatePOMDPModelchecker.h" + +#include + +#include + +#include "storm-pomdp/analysis/FormulaInformation.h" +#include "storm-pomdp/analysis/FiniteBeliefMdpDetection.h" + +#include "storm/utility/ConstantsComparator.h" +#include "storm/utility/NumberTraits.h" +#include "storm/utility/graph.h" +#include "storm/logic/Formulas.h" + +#include "storm/models/sparse/Dtmc.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" +#include "storm/utility/vector.h" +#include "storm/api/properties.h" +#include "storm/api/export.h" +#include "storm-pomdp/builder/BeliefMdpExplorer.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" + +#include "storm/utility/macros.h" +#include "storm/utility/SignalHandler.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + + template + ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { + // Intentionally left empty + } + + template + typename ApproximatePOMDPModelchecker::ValueType + ApproximatePOMDPModelchecker::Result::diff(bool relative) const { + ValueType diff = upperBound - lowerBound; + if (diff < storm::utility::zero()) { + STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); + diff = storm::utility::zero(); + } + if (relative && !storm::utility::isZero(upperBound)) { + diff /= upperBound; + } + return diff; + } + + template + bool ApproximatePOMDPModelchecker::Result::updateLowerBound(ValueType const& value) { + if (value > lowerBound) { + lowerBound = value; + return true; + } + return false; + } + + template + bool ApproximatePOMDPModelchecker::Result::updateUpperBound(ValueType const& value) { + if (value < upperBound) { + upperBound = value; + return true; + } + return false; + } + + template + ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { + // intentionally left empty; + } + + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); + } + + template + typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + STORM_LOG_ASSERT(options.unfold || options.discretize, "Invoked belief exploration but no task (unfold or discretize) given."); + // Reset all collected statistics + statistics = Statistics(); + statistics.totalTime.start(); + // Extract the relevant information from the formula + auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); + + // Compute some initial bounds on the values for each state of the pomdp + auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + uint64_t initialPomdpState = pomdp.getInitialStates().getNextSetIndex(0); + Result result(initialPomdpValueBounds.getHighestLowerBound(initialPomdpState), initialPomdpValueBounds.getSmallestUpperBound(initialPomdpState)); + STORM_PRINT_AND_LOG("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]" << std::endl); + + boost::optional rewardModelName; + if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { + // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. + STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } + } else { + // Expected reward formula! + rewardModelName = formulaInfo.getRewardModelName(); + } + } else { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); + } + if (storm::pomdp::detectFiniteBeliefMdp(pomdp, formulaInfo.getTargetStates().states)) { + STORM_PRINT_AND_LOG("Detected that the belief MDP is finite." << std::endl); + } + + if (options.refine) { + refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result); + } else { + computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds, result); + } + // "clear" results in case they were actually not requested (this will make the output a bit more clear) + if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) { + result.lowerBound = -storm::utility::infinity(); + } + if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) { + result.upperBound = storm::utility::infinity(); + } + + if (storm::utility::resources::isTerminate()) { + statistics.aborted = true; + } + statistics.totalTime.stop(); + return result; + } + + template + void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { + stream << "##### Grid Approximation Statistics ######" << std::endl; + stream << "# Input model: " << std::endl; + pomdp.printModelInformationToStream(stream); + stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; + + if (statistics.aborted) { + stream << "# Computation aborted early" << std::endl; + } + + stream << "# Total check time: " << statistics.totalTime << std::endl; + + // Refinement information: + if (statistics.refinementSteps) { + stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; + } + + // The overapproximation MDP: + if (statistics.overApproximationStates) { + stream << "# Number of states in the "; + if (options.refine) { + stream << "final "; + } + stream << "grid MDP for the over-approximation: "; + if (statistics.overApproximationBuildAborted) { + stream << ">="; + } + stream << statistics.overApproximationStates.get() << std::endl; + stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; + stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; + stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; + } + + // The underapproximation MDP: + if (statistics.underApproximationStates) { + stream << "# Number of states in the "; + if (options.refine) { + stream << "final "; + } + stream << "grid MDP for the under-approximation: "; + if (statistics.underApproximationBuildAborted) { + stream << ">="; + } + stream << statistics.underApproximationStates.get() << std::endl; + if (statistics.underApproximationStateLimit) { + stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; + } + stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; + stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; + } + + stream << "##########################################" << std::endl; + } + + template + void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result) { + + if (options.discretize) { + std::vector observationResolutionVector(pomdp.getNrObservations(), storm::utility::convertNumber(options.resolutionInit)); + auto manager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = std::make_shared(manager, pomdpValueBounds); + HeuristicParameters heuristicParameters; + heuristicParameters.gapThreshold = options.gapThresholdInit; + heuristicParameters.observationThreshold = options.obsThresholdInit; // Actually not relevant without refinement + heuristicParameters.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; + heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, observationResolutionVector, manager, approx); + if (approx->hasComputedValues()) { + STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.lowerBound : result.upperBound; + resultValue = approx->getComputedValueAtInitialState(); + } + } + if (options.unfold) { // Underapproximation (uses a fresh Belief manager) + auto manager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = std::make_shared(manager, pomdpValueBounds); + HeuristicParameters heuristicParameters; + heuristicParameters.gapThreshold = options.gapThresholdInit; + heuristicParameters.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + heuristicParameters.sizeThreshold = options.sizeThresholdInit; + if (heuristicParameters.sizeThreshold == 0) { + if (options.explorationTimeLimit) { + heuristicParameters.sizeThreshold = std::numeric_limits::max(); + } else { + heuristicParameters.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + STORM_PRINT_AND_LOG("Heuristically selected an under-approximation mdp size threshold of " << heuristicParameters.sizeThreshold << "." << std::endl); + } + } + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, heuristicParameters, manager, approx); + if (approx->hasComputedValues()) { + STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.upperBound : result.lowerBound; + resultValue = approx->getComputedValueAtInitialState(); + } + } + } + + template + void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result) { + statistics.refinementSteps = 0; + + // Set up exploration data + std::vector observationResolutionVector; + std::shared_ptr overApproxBeliefManager; + std::shared_ptr overApproximation; + HeuristicParameters overApproxHeuristicPar; + if (options.discretize) { // Setup and build first OverApproximation + observationResolutionVector = std::vector(pomdp.getNrObservations(), storm::utility::convertNumber(options.resolutionInit)); + overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); + if (rewardModelName) { + overApproxBeliefManager->setRewardModel(rewardModelName); + } + overApproximation = std::make_shared(overApproxBeliefManager, pomdpValueBounds); + overApproxHeuristicPar.gapThreshold = options.gapThresholdInit; + overApproxHeuristicPar.observationThreshold = options.obsThresholdInit; + overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits::max() : options.sizeThresholdInit; + overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (!overApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) { + return; + } + ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); + if (betterBound) { + STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + } + } + + std::shared_ptr underApproxBeliefManager; + std::shared_ptr underApproximation; + HeuristicParameters underApproxHeuristicPar; + if (options.unfold) { // Setup and build first UnderApproximation + underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision, options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static); + if (rewardModelName) { + underApproxBeliefManager->setRewardModel(rewardModelName); + } + underApproximation = std::make_shared(underApproxBeliefManager, pomdpValueBounds); + underApproxHeuristicPar.gapThreshold = options.gapThresholdInit; + underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit; + underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit; + if (underApproxHeuristicPar.sizeThreshold == 0) { + // Select a decent value automatically + underApproxHeuristicPar.sizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); + } + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), false, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); + if (!underApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) { + return; + } + ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); + if (betterBound) { + STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " seconds in refinement step #" << statistics.refinementSteps.get() << ". New value is '" << newValue << "'." << std::endl); + } + } + + // Do some output + STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); + bool computingLowerBound = false; + bool computingUpperBound = false; + if (options.discretize) { + STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingLowerBound : computingUpperBound) = true; + } + if (options.unfold) { + STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingUpperBound : computingLowerBound) = true; + } + if (computingLowerBound && computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); + } else if (computingLowerBound) { + STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); + } else if (computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); + } + STORM_PRINT_AND_LOG(std::endl); + + // Start refinement + STORM_LOG_WARN_COND(options.refineStepLimit.is_initialized() || !storm::utility::isZero(options.refinePrecision), "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout"); + STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize), "Refinement goal precision is given, but only one bound is going to be refined."); + while ((!options.refineStepLimit.is_initialized() || statistics.refinementSteps.get() < options.refineStepLimit.get()) && result.diff() > options.refinePrecision) { + bool overApproxFixPoint = true; + bool underApproxFixPoint = true; + if (options.discretize) { + // Refine over-approximation + if (min) { + overApproximation->takeCurrentValuesAsLowerBounds(); + } else { + overApproximation->takeCurrentValuesAsUpperBounds(); + } + overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; + overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); + overApproxHeuristicPar.observationThreshold += options.obsThresholdIncrementFactor * (storm::utility::one() - overApproxHeuristicPar.observationThreshold); + overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; + overApproxFixPoint = buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, overApproxHeuristicPar, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { + ValueType const& newValue = overApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue); + if (betterBound) { + STORM_PRINT_AND_LOG("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); + } + } else { + break; + } + } + + if (options.unfold && result.diff() > options.refinePrecision) { + // Refine under-approximation + underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor; + underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor); + underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor; + underApproxFixPoint = buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), true, underApproxHeuristicPar, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) { + ValueType const& newValue = underApproximation->getComputedValueAtInitialState(); + bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue); + if (betterBound) { + STORM_PRINT_AND_LOG("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #" << (statistics.refinementSteps.get() + 1) << ". New value is '" << newValue << "'." << std::endl); + } + } else { + break; + } + } + + if (storm::utility::resources::isTerminate()) { + break; + } else { + ++statistics.refinementSteps.get(); + // Don't make too many outputs (to avoid logfile clutter) + if (statistics.refinementSteps.get() <= 1000) { + STORM_PRINT_AND_LOG("Completed iteration #" << statistics.refinementSteps.get() << ". Current checktime is " << statistics.totalTime << "."); + bool computingLowerBound = false; + bool computingUpperBound = false; + if (options.discretize) { + STORM_PRINT_AND_LOG(" Over-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingLowerBound : computingUpperBound) = true; + } + if (options.unfold) { + STORM_PRINT_AND_LOG(" Under-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << "."); + (min ? computingUpperBound : computingLowerBound) = true; + } + if (computingLowerBound && computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is [" << result.lowerBound << ", " << result.upperBound << "]."); + } else if (computingLowerBound) { + STORM_PRINT_AND_LOG(" Current result is ≥" << result.lowerBound << "."); + } else if (computingUpperBound) { + STORM_PRINT_AND_LOG(" Current result is ≤" << result.upperBound << "."); + } + STORM_PRINT_AND_LOG(std::endl); + STORM_LOG_WARN_COND(statistics.refinementSteps.get() < 1000, "Refinement requires more than 1000 iterations."); + } + } + if (overApproxFixPoint && underApproxFixPoint) { + STORM_PRINT_AND_LOG("Refinement fixpoint reached after " << statistics.refinementSteps.get() << " iterations." << std::endl); + break; + } + } + } + + /*! + * Heuristically rates the quality of the approximation described by the given successor observation info. + * Here, 0 means a bad approximation and 1 means a good approximation. + */ + template + BeliefValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) { + auto n = storm::utility::convertNumber(info.support.size()); + auto one = storm::utility::one(); + if (storm::utility::isOne(n)) { + // If the belief is Dirac, it has to be approximated precisely. + // In this case, we return the best possible rating + return one; + } else { + // Create the rating for this observation at this choice from the given info + BeliefValueType obsChoiceRating = storm::utility::convertNumber(info.maxProbabilityToSuccessorWithObs / info.observationProbability); + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) + obsChoiceRating *= observationResolution / maxResolution; + return obsChoiceRating; + } + } + + template + std::vector ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector) { + uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates(); + auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices(); + BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); + + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) { + // Check whether this state is reached under an optimal scheduler. + // The heuristic assumes that the remaining states are not relevant for the observation score. + if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) { + for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) { + // Similarly, only optimal actions are relevant + if (overApproximation->actionIsOptimal(mdpChoice)) { + // score the observations for this choice + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + auto const& obs = obsInfo.first; + BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); + + // The rating of the observation will be the minimum over all choice-based observation ratings + resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + } + } + } + } + } + return resultingRatings; + } + + template + ValueType getGap(ValueType const& l, ValueType const& u) { + STORM_LOG_ASSERT(l >= storm::utility::zero() && u >= storm::utility::zero(), "Gap computation currently does not handle negative values."); + if (storm::utility::isInfinity(u)) { + if (storm::utility::isInfinity(l)) { + return storm::utility::zero(); + } else { + return u; + } + } else if (storm::utility::isZero(u)) { + STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << "."); + return u; + } else { + STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << "."); + // get the relative gap + return storm::utility::abs(u-l) * storm::utility::convertNumber(2) / (l+u); + } + } + + template + bool ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + + // Detect whether the refinement reached a fixpoint. + bool fixPoint = true; + + statistics.overApproximationBuildTime.start(); + storm::storage::BitVector refinedObservations; + if (!refine) { + // If we build the model from scratch, we first have to setup the explorer for the overApproximation. + if (computeRewards) { + overApproximation->startNewExploration(storm::utility::zero()); + } else { + overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } + } else { + // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy + overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true); + // We also need to find out which observation resolutions needs refinement. + // current maximal resolution (needed for refinement heuristic) + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector); + // If there is a score < 1, we have not reached a fixpoint, yet + BeliefValueType numericPresicion = storm::utility::convertNumber(options.numericPrecision); + if (std::any_of(obsRatings.begin(), obsRatings.end(), [&numericPresicion](ValueType const& value){return value + numericPresicion < storm::utility::one();})) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because there are still observations to refine."); + fixPoint = false; + } + refinedObservations = storm::utility::vector::filter(obsRatings, [&heuristicParameters](ValueType const& r) { return r <= heuristicParameters.observationThreshold;}); + STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + for (auto const& obs : refinedObservations) { + // Increment the resolution at the refined observations. + // Use storm's rational number to detect overflows properly. + storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber(observationResolutionVector[obs]) * storm::utility::convertNumber(options.resolutionFactor); + static_assert(storm::NumberTraits::IsExact || std::is_same::value, "Unhandled belief value type"); + if (!storm::NumberTraits::IsExact && newObsResolutionAsRational > storm::utility::convertNumber(std::numeric_limits::max())) { + observationResolutionVector[obs] = storm::utility::convertNumber(std::numeric_limits::max()); + } else { + observationResolutionVector[obs] = storm::utility::convertNumber(newObsResolutionAsRational); + } + } + overApproximation->restartExploration(); + } + statistics.overApproximationMaxResolution = storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end())); + + // Start exploration + storm::utility::Stopwatch explorationTime; + if (options.explorationTimeLimit) { + explorationTime.start(); + } + bool timeLimitExceeded = false; + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + uint64_t numRewiredOrExploredStates = 0; + while (overApproximation->hasUnexploredState()) { + if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because the exploration time limit is exceeded."); + fixPoint = false; + } + + uint64_t currId = overApproximation->exploreNextState(); + bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior(); + if (!hasOldBehavior) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a new state is explored"); + fixPoint = false; // Exploring a new state! + } + uint32_t currObservation = beliefManager->getBeliefObservation(currId); + if (targetObservations.count(currObservation) != 0) { + overApproximation->setCurrentStateIsTarget(); + overApproximation->addSelfloopTransition(); + } else { + // We need to decide how to treat this state (and each individual enabled action). There are the following cases: + // 1 The state has no old behavior and + // 1.1 we explore all actions or + // 1.2 we truncate all actions + // 2 The state has old behavior and was truncated in the last iteration and + // 2.1 we explore all actions or + // 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds) + // 3 The state has old behavior and was not truncated in the last iteration and the current action + // 3.1 should be rewired or + // 3.2 should get the old behavior but either + // 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action + // 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step + + // Find out in which case we are + bool exploreAllActions = false; + bool truncateAllActions = false; + bool restoreAllActions = false; + bool checkRewireForAllActions = false; + // Get the relative gap + ValueType gap = getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState()); + if (!hasOldBehavior) { + // Case 1 + // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that reaches this state + if (!timeLimitExceeded && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + exploreAllActions = true; // Case 1.1 + } else { + truncateAllActions = true; // Case 1.2 + overApproximation->setCurrentStateIsTruncated(); + } + } else { + if (overApproximation->getCurrentStateWasTruncated()) { + // Case 2 + if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + exploreAllActions = true; // Case 2.1 + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a previously truncated state is now explored."); + fixPoint = false; + } else { + truncateAllActions = true; // Case 2.2 + overApproximation->setCurrentStateIsTruncated(); + if (fixPoint) { + // Properly check whether this can still be a fixpoint + if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we truncate a state with non-zero gap " << gap << " that is reachable via an optimal sched."); + fixPoint = false; + } + //} else { + // In this case we truncated a state that is not reachable under optimal schedulers. + // If no other state is explored (i.e. fixPoint remaints true), these states should still not be reachable in subsequent iterations + } + } + } else { + // Case 3 + // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the action) + if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) { + checkRewireForAllActions = true; // Case 3.1 or Case 3.2 + } else { + restoreAllActions = true; // Definitely Case 3.2 + // We still need to check for each action whether rewiring makes sense later + checkRewireForAllActions = true; + } + } + } + bool expandedAtLeastOneAction = false; + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + bool expandCurrentAction = exploreAllActions || truncateAllActions; + if (checkRewireForAllActions) { + assert(refine); + // In this case, we still need to check whether this action needs to be expanded + assert(!expandCurrentAction); + // Check the action dependent conditions for rewiring + // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring would actually change the successor states) + assert(overApproximation->currentStateHasOldBehavior()); + if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) || overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) { + // Then, check whether the other criteria for rewiring are satisfied + if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) { + // Do the rewiring now! (Case 3.1) + expandCurrentAction = true; + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we rewire a state."); + fixPoint = false; + } else { + // Delay the rewiring (Case 3.2.2) + overApproximation->setCurrentChoiceIsDelayed(action); + if (fixPoint) { + // Check whether this delay means that a fixpoint has not been reached + if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) || (overApproximation->currentStateIsOptimalSchedulerReachable() && overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) { + STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap " << gap << " that is reachable via an optimal scheduler."); + fixPoint = false; + } + } + } + } // else { Case 3.2.1 } + } + + if (expandCurrentAction) { + expandedAtLeastOneAction = true; + if (!truncateAllActions) { + // Cases 1.1, 2.1, or 3.1 + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + overApproximation->addTransitionToBelief(action, successor.first, successor.second, false); + } + if (computeRewards) { + overApproximation->computeRewardAtCurrentState(action); + } + } else { + // Cases 1.2 or 2.2 + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true); + if (!added) { + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + overApproximation->addTransitionsToExtraStates(action, truncationProbability); + overApproximation->computeRewardAtCurrentState(action, truncationValueBound); + } else { + overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + } else { + // Case 3.2 + overApproximation->restoreOldBehaviorAtCurrentState(action); + } + } + if (expandedAtLeastOneAction) { + ++numRewiredOrExploredStates; + } + } + + if (storm::utility::resources::isTerminate()) { + break; + } + } + + if (storm::utility::resources::isTerminate()) { + // don't overwrite statistics of a previous, successful computation + if (!statistics.overApproximationStates) { + statistics.overApproximationBuildAborted = true; + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); + } + statistics.overApproximationBuildTime.stop(); + return false; + } + + overApproximation->finishExploration(); + statistics.overApproximationBuildTime.stop(); + + statistics.overApproximationCheckTime.start(); + overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); + statistics.overApproximationCheckTime.stop(); + + // don't overwrite statistics of a previous, successful computation + if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) { + statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates(); + } + return fixPoint; + } + + template + bool ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { + statistics.underApproximationBuildTime.start(); + bool fixPoint = true; + if (heuristicParameters.sizeThreshold != std::numeric_limits::max()) { + statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold; + } + if (!refine) { + // Build a new under approximation + if (computeRewards) { + underApproximation->startNewExploration(storm::utility::zero()); + } else { + underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } + } else { + // Restart the building process + underApproximation->restartExploration(); + } + + // Expand the beliefs + storm::utility::Stopwatch explorationTime; + if (options.explorationTimeLimit) { + explorationTime.start(); + } + bool timeLimitExceeded = false; + while (underApproximation->hasUnexploredState()) { + if (!timeLimitExceeded && options.explorationTimeLimit && static_cast(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit.get()) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + } + uint64_t currId = underApproximation->exploreNextState(); + + uint32_t currObservation = beliefManager->getBeliefObservation(currId); + bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated(); + if (!stateAlreadyExplored || timeLimitExceeded) { + fixPoint = false; + } + if (targetObservations.count(currObservation) != 0) { + underApproximation->setCurrentStateIsTarget(); + underApproximation->addSelfloopTransition(); + } else { + bool stopExploration = false; + if (timeLimitExceeded) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (!stateAlreadyExplored) { + // Check whether we want to explore the state now! + ValueType gap = getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState()); + if (gap < heuristicParameters.gapThreshold) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (underApproximation->getCurrentNumberOfMdpStates() >= heuristicParameters.sizeThreshold) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + // Always restore old behavior if available + if (stateAlreadyExplored) { + underApproximation->restoreOldBehaviorAtCurrentState(action); + } else { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of other + // under-approximation values. In general, this does not yield a sound underapproximation anymore. + // However, in our case this is still the case as the under-approximation values are based on a memoryless scheduler. + truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); + } + } + if (stopExploration) { + if (computeRewards) { + underApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + underApproximation->computeRewardAtCurrentState(action, truncationValueBound); + } + } + } + } + if (storm::utility::resources::isTerminate()) { + break; + } + } + + if (storm::utility::resources::isTerminate()) { + // don't overwrite statistics of a previous, successful computation + if (!statistics.underApproximationStates) { + statistics.underApproximationBuildAborted = true; + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); + } + statistics.underApproximationBuildTime.stop(); + return false; + } + + underApproximation->finishExploration(); + statistics.underApproximationBuildTime.stop(); + + statistics.underApproximationCheckTime.start(); + underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); + statistics.underApproximationCheckTime.stop(); + + // don't overwrite statistics of a previous, successful computation + if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) { + statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates(); + } + return fixPoint; + + } + + template class ApproximatePOMDPModelchecker>; + template class ApproximatePOMDPModelchecker>; + + } + } +} diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h new file mode 100644 index 000000000..f5904e0dc --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -0,0 +1,121 @@ +#include "storm/api/storm.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/logging.h" +#include "storm-pomdp/storage/Belief.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelCheckerOptions.h" +#include "storm-pomdp/builder/BeliefMdpExplorer.h" + +#include "storm/storage/jani/Property.h" + +namespace storm { + namespace logic { + class Formula; + } + + namespace pomdp { + namespace modelchecker { + + template + struct TrivialPomdpValueBounds; + + template + class ApproximatePOMDPModelchecker { + public: + typedef typename PomdpModelType::ValueType ValueType; + typedef typename PomdpModelType::RewardModelType RewardModelType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::builder::BeliefMdpExplorer ExplorerType; + typedef ApproximatePOMDPModelCheckerOptions Options; + + struct Result { + Result(ValueType lower, ValueType upper); + ValueType lowerBound; + ValueType upperBound; + ValueType diff (bool relative = false) const; + bool updateLowerBound(ValueType const& value); + bool updateUpperBound(ValueType const& value); + }; + + ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); + + Result check(storm::logic::Formula const& formula); + + void printStatisticsToStream(std::ostream& stream) const; + + private: + /** + * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size + * + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value + * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value + * @param maxUaModelSize the maximum size of the underapproximation model to be generated + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ + void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result); + + + /** + * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop + * + * @param targetObservations the set of observations to be reached + * @param min true if minimum probability is to be computed + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values + */ + void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, storm::pomdp::modelchecker::TrivialPomdpValueBounds const& pomdpValueBounds, Result& result); + + struct HeuristicParameters { + ValueType gapThreshold; + ValueType observationThreshold; + uint64_t sizeThreshold; + ValueType optimalChoiceValueEpsilon; + }; + + /** + * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties + * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) + */ + bool buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + + /** + * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties + * Returns true if a fixpoint for the refinement has been detected (i.e. if further refinement steps would not change the mdp) + */ + bool buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, HeuristicParameters const& heuristicParameters, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); + + BeliefValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution); + + std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector); + + struct Statistics { + Statistics(); + boost::optional refinementSteps; + storm::utility::Stopwatch totalTime; + + boost::optional overApproximationStates; + bool overApproximationBuildAborted; + storm::utility::Stopwatch overApproximationBuildTime; + storm::utility::Stopwatch overApproximationCheckTime; + boost::optional overApproximationMaxResolution; + + boost::optional underApproximationStates; + bool underApproximationBuildAborted; + storm::utility::Stopwatch underApproximationBuildTime; + storm::utility::Stopwatch underApproximationCheckTime; + boost::optional underApproximationStateLimit; + + bool aborted; + }; + Statistics statistics; + + PomdpModelType const& pomdp; + Options options; + storm::utility::ConstantsComparator cc; + }; + + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h new file mode 100644 index 000000000..0120f0817 --- /dev/null +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -0,0 +1,207 @@ +#pragma once + +#include "storm-pomdp/analysis/FormulaInformation.h" + +#include "storm/api/verification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/storage/Scheduler.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + + template + struct TrivialPomdpValueBounds { + std::vector> lower; + std::vector> upper; + ValueType getHighestLowerBound(uint64_t const& state) { + STORM_LOG_ASSERT(!lower.empty(), "requested a lower bound but none were available"); + auto it = lower.begin(); + ValueType result = (*it)[state]; + for (++it; it != lower.end(); ++it) { + result = std::max(result, (*it)[state]); + } + return result; + } + ValueType getSmallestUpperBound(uint64_t const& state) { + STORM_LOG_ASSERT(!upper.empty(), "requested an upper bound but none were available"); + auto it = upper.begin(); + ValueType result = (*it)[state]; + for (++it; it != upper.end(); ++it) { + result = std::min(result, (*it)[state]); + } + return result; + } + }; + + template + class TrivialPomdpValueBoundsModelChecker { + public: + typedef typename PomdpType::ValueType ValueType; + typedef TrivialPomdpValueBounds ValueBounds; + TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { + // Intentionally left empty + } + + ValueBounds getValueBounds(storm::logic::Formula const& formula) { + return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); + } + + std::vector getChoiceValues(std::vector const& stateValues, std::vector* actionBasedRewards) { + std::vector choiceValues((pomdp.getNumberOfChoices())); + pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards); + return choiceValues; + } + + std::vector computeValuesForGuessedScheduler(std::vector const& stateValues, std::vector* actionBasedRewards, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, std::shared_ptr> underlyingMdp, ValueType const& scoreThreshold, bool relativeScore) { + // Create some positional scheduler for the POMDP + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + // For each state, we heuristically find a good distribution over output actions. + auto choiceValues = getChoiceValues(stateValues, actionBasedRewards); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + std::vector> choiceDistributions(pomdp.getNrObservations()); + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)]; + ValueType const& stateValue = stateValues[state]; + assert(stateValue >= storm::utility::zero()); + for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { + ValueType const& choiceValue = choiceValues[choice]; + assert(choiceValue >= storm::utility::zero()); + // Rate this choice by considering the relative difference between the choice value and the (optimal) state value + // A high score shall mean that the choice is "good" + if (storm::utility::isInfinity(stateValue)) { + // For infinity states, we simply distribute uniformly. + // FIXME: This case could be handled a bit more sensible + choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold); + } else { + ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue); + if (relativeScore) { + ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber(2); + if (!storm::utility::isZero(avg)) { + choiceScore /= avg; + } + } + choiceScore = storm::utility::one() - choiceScore; + if (choiceScore >= scoreThreshold) { + choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore); + } + } + + } + STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution."); + } + // Normalize all distributions + for (auto& choiceDistribution : choiceDistributions) { + choiceDistribution.normalize(); + } + // Set the scheduler for all states + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state); + } + STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + + auto resultPtr = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + return pomdpSchedulerResult; + } + + ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { + STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); + + // Compute the values on the fully observable MDP + // We need an actual MDP so that we can apply schedulers below. + // Also, the api call in the next line will require a copy anyway. + auto underlyingMdp = std::make_shared>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); + auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + + std::vector actionBasedRewards; + std::vector* actionBasedRewardsPtr = nullptr; + if (info.isNonNestedExpectedRewardFormula()) { + actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); + actionBasedRewardsPtr = &actionBasedRewards; + } + std::vector> guessedSchedulerValues; + + std::vector> guessParameters({{0.875,false},{0.875,true},{0.75,false},{0.75,true}}); + for (auto const& pars : guessParameters) { + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(pars.first), pars.second)); + } + + // compute the 'best' guess and do a few iterations on it + uint64_t bestGuess = 0; + ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero()); + for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) { + ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero()); + if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) { + bestGuess = guess; + bestGuessSum = guessSum; + } + } + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); + guessedSchedulerValues.push_back(computeValuesForGuessedScheduler(guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp, storm::utility::convertNumber(guessParameters[bestGuess].first), guessParameters[bestGuess].second)); + + // Check if one of the guesses is worse than one of the others (and potentially delete it) + // Avoid deleting entries during the loop to ensure that indices remain valid + storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true); + for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) { + if (!keptGuesses.get(i)) { + continue; + } + for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) { + if (!keptGuesses.get(j)) { + continue; + } + if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal())) { + if (info.minimize()) { + // In this case we are guessing upper bounds (and smaller upper bounds are better) + keptGuesses.set(j, false); + } else { + // In this case we are guessing lower bounds (and larger lower bounds are better) + keptGuesses.set(i, false); + break; + } + } else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal())) { + if (info.minimize()) { + keptGuesses.set(i, false); + break; + } else { + keptGuesses.set(j, false); + } + } + } + } + std::cout << "Keeping scheduler guesses " << keptGuesses << std::endl; + storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses); + + // Finally prepare the result + ValueBounds result; + if (info.minimize()) { + result.lower.push_back(std::move(fullyObservableResult)); + result.upper = std::move(guessedSchedulerValues); + } else { + result.lower = std::move(guessedSchedulerValues); + result.upper.push_back(std::move(fullyObservableResult)); + } + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal()), "Lower bound is larger than upper bound"); + return result; + } + + private: + PomdpType const& pomdp; + }; + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/storage/Belief.h b/src/storm-pomdp/storage/Belief.h new file mode 100644 index 000000000..ed7591f1b --- /dev/null +++ b/src/storm-pomdp/storage/Belief.h @@ -0,0 +1,12 @@ +namespace storm { + namespace pomdp { + // Structure used to represent a belief + template + struct Belief { + uint64_t id; + uint32_t observation; + //TODO make this sparse? + std::map probabilities; + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h new file mode 100644 index 000000000..4f9ca445c --- /dev/null +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -0,0 +1,507 @@ +#pragma once + +#include +#include +#include +#include +#include "storm/adapters/RationalNumberAdapter.h" +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { + namespace storage { + + template + class BeliefManager { + public: + + typedef typename PomdpType::ValueType ValueType; + typedef boost::container::flat_map BeliefType; // iterating over this shall be ordered (for correct hash computation) + typedef boost::container::flat_set BeliefSupportType; + typedef uint64_t BeliefId; + + enum class TriangulationMode { + Static, + Dynamic + }; + + BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision, TriangulationMode const& triangulationMode) : pomdp(pomdp), cc(precision, false), triangulationMode(triangulationMode) { + beliefToIdMap.resize(pomdp.getNrObservations()); + initialBeliefId = computeInitialBelief(); + } + + void setRewardModel(boost::optional rewardModelName = boost::none) { + if (rewardModelName) { + auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get()); + pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix()); + } else { + setRewardModel(pomdp.getUniqueRewardModelName()); + } + } + + void unsetRewardModel() { + pomdpActionRewardVector.clear(); + } + + struct Triangulation { + std::vector gridPoints; + std::vector weights; + uint64_t size() const { + return weights.size(); + } + }; + + BeliefId noId() const { + return std::numeric_limits::max(); + } + + bool isEqual(BeliefId const& first, BeliefId const& second) const { + return isEqual(getBelief(first), getBelief(second)); + } + + std::string toString(BeliefId const& beliefId) const { + return toString(getBelief(beliefId)); + } + + + std::string toString(Triangulation const& t) const { + std::stringstream str; + str << "(\n"; + for (uint64_t i = 0; i < t.size(); ++i) { + str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; + } + str <<")\n"; + return str.str(); + } + + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + + BeliefId const& getInitialBelief() const { + return initialBeliefId; + } + + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); + STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); + auto result = storm::utility::zero(); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (auto const &entry : belief) { + uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); + STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); + result += entry.second * pomdpActionRewardVector[choiceIndex]; + } + return result; + } + + uint32_t getBeliefObservation(BeliefId beliefId) { + return getBeliefObservation(getBelief(beliefId)); + } + + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto const& belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + + Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution) { + return triangulateBelief(getBelief(beliefId), resolution); + } + + template + void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { + auto insertionRes = distr.emplace(state, value); + if (!insertionRes.second) { + insertionRes.first->second += value; + } + } + + void joinSupport(BeliefId const& beliefId, BeliefSupportType& support) { + auto const& belief = getBelief(beliefId); + for (auto const& entry : belief) { + support.insert(entry.first); + } + } + + BeliefId getNumberOfBeliefIds() const { + return beliefs.size(); + } + + std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(beliefId, actionIndex, observationResolutions); + } + + std::vector> expand(BeliefId const& beliefId, uint64_t actionIndex) { + return expandInternal(beliefId, actionIndex); + } + + private: + + BeliefType const& getBelief(BeliefId const& id) const { + STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); + STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); + return beliefs[id]; + } + + BeliefId getId(BeliefType const& belief) const { + uint32_t obs = getBeliefObservation(belief); + STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation."); + auto idIt = beliefToIdMap[obs].find(belief); + STORM_LOG_ASSERT(idIt != beliefToIdMap.end(), "Unknown Belief."); + return idIt->second; + } + + std::string toString(BeliefType const& belief) const { + std::stringstream str; + str << "{ "; + bool first = true; + for (auto const& entry : belief) { + if (first) { + first = false; + } else { + str << ", "; + } + str << entry.first << ": " << entry.second; + } + str << " }"; + return str.str(); + } + + bool isEqual(BeliefType const& first, BeliefType const& second) const { + if (first.size() != second.size()) { + return false; + } + auto secondIt = second.begin(); + for (auto const& firstEntry : first) { + if (firstEntry.first != secondIt->first) { + return false; + } + if (!cc.isEqual(firstEntry.second, secondIt->second)) { + return false; + } + ++secondIt; + } + return true; + } + + bool assertBelief(BeliefType const& belief) const { + BeliefValueType sum = storm::utility::zero(); + boost::optional observation; + for (auto const& entry : belief) { + if (entry.first >= pomdp.getNumberOfStates()) { + STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << "."); + return false; + } + uint64_t entryObservation = pomdp.getObservation(entry.first); + if (observation) { + if (observation.get() != entryObservation) { + STORM_LOG_ERROR("Beliefsupport contains different observations."); + return false; + } + } else { + observation = entryObservation; + } + // Don't use cc for these checks, because computations with zero are usually fine + if (storm::utility::isZero(entry.second)) { + // We assume that beliefs only consider their support. + STORM_LOG_ERROR("Zero belief probability."); + return false; + } + if (entry.second < storm::utility::zero()) { + STORM_LOG_ERROR("Negative belief probability."); + return false; + } + if (cc.isLess(storm::utility::one(), entry.second)) { + STORM_LOG_ERROR("Belief probability greater than one."); + return false; + } + sum += entry.second; + } + if (!cc.isOne(sum)) { + STORM_LOG_ERROR("Belief does not sum up to one. (" << sum << " instead)."); + return false; + } + return true; + } + + bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { + if (triangulation.weights.size() != triangulation.gridPoints.size()) { + STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); + return false; + } + if (triangulation.size() == 0) { + STORM_LOG_ERROR("Empty triangulation."); + return false; + } + BeliefType triangulatedBelief; + BeliefValueType weightSum = storm::utility::zero(); + for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { + if (cc.isZero(triangulation.weights[i])) { + STORM_LOG_ERROR("Zero weight in triangulation."); + return false; + } + if (cc.isLess(triangulation.weights[i], storm::utility::zero())) { + STORM_LOG_ERROR("Negative weight in triangulation."); + return false; + } + if (cc.isLess(storm::utility::one(), triangulation.weights[i])) { + STORM_LOG_ERROR("Weight greater than one in triangulation."); + } + weightSum += triangulation.weights[i]; + BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]); + for (auto const& pointEntry : gridPoint) { + BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; + triangulatedValue += triangulation.weights[i] * pointEntry.second; + } + } + if (!cc.isOne(weightSum)) { + STORM_LOG_ERROR("Triangulation weights do not sum up to one."); + return false; + } + if (!assertBelief(triangulatedBelief)) { + STORM_LOG_ERROR("Triangulated belief is not a belief."); + } + if (!isEqual(belief, triangulatedBelief)) { + STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); + return false; + } + return true; + } + + uint32_t getBeliefObservation(BeliefType belief) { + STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); + return pomdp.getObservation(belief.begin()->first); + } + + struct FreudenthalDiff { + FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { }; + StateType dimension; // i + BeliefValueType diff; // d[i] + bool operator>(FreudenthalDiff const& other) const { + if (diff != other.diff) { + return diff > other.diff; + } else { + return dimension < other.dimension; + } + } + }; + + void triangulateBeliefFreudenthal(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { + STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0"); + STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); + StateType numEntries = belief.size(); + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N + // Variable names are mostly based on the paper + // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). + // Initialize diffs and the first row of the 'qs' matrix (aka v) + std::set> sorted_diffs; // d (and p?) in the paper + std::vector qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v + qsRow.reserve(numEntries); + std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices + toOriginalIndicesMap.reserve(numEntries); + BeliefValueType x = resolution; + for (auto const& entry : belief) { + qsRow.push_back(storm::utility::floor(x)); // v + sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v + toOriginalIndicesMap.push_back(entry.first); + x -= entry.second * resolution; + } + // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler + qsRow.push_back(storm::utility::zero()); + + result.weights.reserve(numEntries); + result.gridPoints.reserve(numEntries); + auto currentSortedDiff = sorted_diffs.begin(); + auto previousSortedDiff = sorted_diffs.end(); + --previousSortedDiff; + for (StateType i = 0; i < numEntries; ++i) { + // Compute the weight for the grid points + BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; + if (i == 0) { + // The first weight is a bit different + weight += storm::utility::one(); + } else { + // 'compute' the next row of the qs matrix + qsRow[previousSortedDiff->dimension] += storm::utility::one(); + } + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + // Compute the grid point + BeliefType gridPoint; + for (StateType j = 0; j < numEntries; ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution; + } + } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); + } + previousSortedDiff = currentSortedDiff++; + } + } + + void triangulateBeliefDynamic(BeliefType const& belief, BeliefValueType const& resolution, Triangulation& result) { + // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is minimal + STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution"); + BeliefValueType finalResolution = resolution; + uint64_t finalResolutionMisses = belief.size() + 1; + // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these) + for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) { + uint64_t currResMisses = 0; + bool continueWithNextResolution = false; + for (auto const& belEntry : belief) { + BeliefValueType product = belEntry.second * currResolution; + if (!cc.isZero(product - storm::utility::round(product))) { + ++currResMisses; + if (currResMisses >= finalResolutionMisses) { + // This resolution is not better than a previous resolution + continueWithNextResolution = true; + break; + } + } + } + if (!continueWithNextResolution) { + STORM_LOG_ASSERT(currResMisses < finalResolutionMisses, "Distance for this resolution should not be larger than a previously checked one."); + finalResolution = currResolution; + finalResolutionMisses = currResMisses; + if (currResMisses == 0) { + break; + } + } + } + + STORM_LOG_TRACE("Picking resolution " << finalResolution << " for belief " << toString(belief)); + + // do standard freudenthal with the found resolution + triangulateBeliefFreudenthal(belief, finalResolution, result); + } + + Triangulation triangulateBelief(BeliefType const& belief, BeliefValueType const& resolution) { + STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); + Triangulation result; + // Quickly triangulate Dirac beliefs + if (belief.size() == 1u) { + result.weights.push_back(storm::utility::one()); + result.gridPoints.push_back(getOrAddBeliefId(belief)); + } else { + auto ceiledResolution = storm::utility::ceil(resolution); + switch (triangulationMode) { + case TriangulationMode::Static: + triangulateBeliefFreudenthal(belief, ceiledResolution, result); + break; + case TriangulationMode::Dynamic: + triangulateBeliefDynamic(belief, ceiledResolution, result); + break; + default: + STORM_LOG_ASSERT(false, "Invalid triangulation mode."); + } + } + STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); + return result; + } + + std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::vector> destinations; + + BeliefType belief = getBelief(beliefId); + + // Find the probability we go to each observation + BeliefType successorObs; // This is actually not a belief but has the same type + for (auto const& pointEntry : belief) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (!storm::utility::isZero(pomdpTransition.getValue())) { + auto obs = pomdp.getObservation(pomdpTransition.getColumn()); + addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); + } + } + } + + // Now for each successor observation we find and potentially triangulate the successor belief + for (auto const& successor : successorObs) { + BeliefType successorBelief; + for (auto const& pointEntry : belief) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { + ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; + addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); + } + } + } + STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); + + // Insert the destination. We know that destinations have to be disjoined since they have different observations + if (observationTriangulationResolutions) { + Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); + for (size_t j = 0; j < triangulation.size(); ++j) { + // Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times + destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + } + } else { + destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second); + } + } + + return destinations; + + } + + BeliefId computeInitialBelief() { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, + "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); + BeliefType belief; + belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); + + STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); + return getOrAddBeliefId(belief); + } + + BeliefId getOrAddBeliefId(BeliefType const& belief) { + uint32_t obs = getBeliefObservation(belief); + STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation."); + auto insertioRes = beliefToIdMap[obs].emplace(belief, beliefs.size()); + if (insertioRes.second) { + // There actually was an insertion, so add the new belief + beliefs.push_back(belief); + } + // Return the id + return insertioRes.first->second; + } + + struct BeliefHash { + std::size_t operator()(const BeliefType& belief) const { + std::size_t seed = 0; + // Assumes that beliefs are ordered + for (auto const& entry : belief) { + boost::hash_combine(seed, entry.first); + boost::hash_combine(seed, entry.second); + } + return seed; + } + }; + + PomdpType const& pomdp; + std::vector pomdpActionRewardVector; + + std::vector beliefs; + std::vector> beliefToIdMap; + BeliefId initialBeliefId; + + storm::utility::ConstantsComparator cc; + + TriangulationMode triangulationMode; + + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp b/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp index 1757598ce..22bf11cfe 100644 --- a/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp +++ b/src/storm-pomdp/transformer/ApplyFiniteSchedulerToPomdp.cpp @@ -128,5 +128,8 @@ namespace storm { } template class ApplyFiniteSchedulerToPomdp; + + template + class ApplyFiniteSchedulerToPomdp; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp b/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp index f5ad38446..dd94aa013 100644 --- a/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp +++ b/src/storm-pomdp/transformer/BinaryPomdpTransformer.cpp @@ -162,5 +162,7 @@ namespace storm { template class BinaryPomdpTransformer; + template + class BinaryPomdpTransformer; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp b/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp index c61ce5b0f..54ad9631c 100644 --- a/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp +++ b/src/storm-pomdp/transformer/GlobalPOMDPSelfLoopEliminator.cpp @@ -77,5 +77,8 @@ namespace storm { } template class GlobalPOMDPSelfLoopEliminator; + + template + class GlobalPOMDPSelfLoopEliminator; } } diff --git a/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp b/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp index d01342a7c..853b70952 100644 --- a/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp +++ b/src/storm-pomdp/transformer/GlobalPomdpMecChoiceEliminator.cpp @@ -225,5 +225,8 @@ namespace storm { template class GlobalPomdpMecChoiceEliminator; + + template + class GlobalPomdpMecChoiceEliminator; } } \ No newline at end of file diff --git a/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp new file mode 100644 index 000000000..e764b69ce --- /dev/null +++ b/src/storm-pomdp/transformer/KnownProbabilityTransformer.cpp @@ -0,0 +1,123 @@ +#include "KnownProbabilityTransformer.h" + +namespace storm { + namespace pomdp { + namespace transformer { + + template + KnownProbabilityTransformer::KnownProbabilityTransformer() { + // Intentionally left empty + } + + template + std::shared_ptr> + KnownProbabilityTransformer::transform(storm::models::sparse::Pomdp const &pomdp, storm::storage::BitVector &prob0States, + storm::storage::BitVector &prob1States) { + std::map stateMap; + std::map observationMap; + + uint64_t nrNewStates = prob0States.empty() ? 1 : 2; + + storm::models::sparse::StateLabeling newLabeling(pomdp.getNumberOfStates() - prob0States.getNumberOfSetBits() - prob1States.getNumberOfSetBits() + nrNewStates); + + std::vector newObservations; + + // New state 0 represents all states with probability 1 + for (auto const &iter : prob1States) { + stateMap[iter] = 0; + + std::set labelSet = pomdp.getStateLabeling().getLabelsOfState(iter); + for (auto const &label : labelSet) { + if (!newLabeling.containsLabel(label)) { + newLabeling.addLabel(label); + } + newLabeling.addLabelToState(label, 0); + } + } + // New state 1 represents all states with probability 0 + for (auto const &iter : prob0States) { + stateMap[iter] = 1; + for (auto const &label : pomdp.getStateLabeling().getLabelsOfState(iter)) { + if (!newLabeling.containsLabel(label)) { + newLabeling.addLabel(label); + } + newLabeling.addLabelToState(label, 1); + } + } + + storm::storage::BitVector unknownStates = ~(prob1States | prob0States); + //If there are no states with probability 0 we set the next new state id to be 1, otherwise 2 + uint64_t newId = prob0States.empty() ? 1 : 2; + uint64_t nextObservation = prob0States.empty() ? 1 : 2; + for (auto const &iter : unknownStates) { + stateMap[iter] = newId; + if (observationMap.count(pomdp.getObservation(iter)) == 0) { + observationMap[pomdp.getObservation(iter)] = nextObservation; + ++nextObservation; + } + for (auto const &label : pomdp.getStateLabeling().getLabelsOfState(iter)) { + if (!newLabeling.containsLabel(label)) { + newLabeling.addLabel(label); + } + newLabeling.addLabelToState(label, newId); + } + ++newId; + } + + uint64_t newNrOfStates = pomdp.getNumberOfStates() - (prob1States.getNumberOfSetBits() + prob0States.getNumberOfSetBits()); + + uint64_t currentRow = 0; + uint64_t currentRowGroup = 0; + storm::storage::SparseMatrixBuilder smb(0, 0, 0, false, true); + //new row for prob 1 state + smb.newRowGroup(currentRow); + smb.addNextValue(currentRow, 0, storm::utility::one()); + newObservations.push_back(0); + ++currentRowGroup; + ++currentRow; + if (!prob0States.empty()) { + smb.newRowGroup(currentRow); + smb.addNextValue(currentRow, 1, storm::utility::one()); + ++currentRowGroup; + ++currentRow; + newObservations.push_back(1); + } + + auto transitionMatrix = pomdp.getTransitionMatrix(); + + for (auto const &iter : unknownStates) { + smb.newRowGroup(currentRow); + // First collect all transitions + //auto rowGroup = transitionMatrix.getRowGroup(iter); + for (uint64_t row = 0; row < transitionMatrix.getRowGroupSize(iter); ++row) { + std::map transitionsInAction; + for (auto const &entry : transitionMatrix.getRow(iter, row)) { + // here we use the state mapping to collect all probabilities to get to a state with prob 0/1 + transitionsInAction[stateMap[entry.getColumn()]] += entry.getValue(); + } + for (auto const &transition : transitionsInAction) { + smb.addNextValue(currentRow, transition.first, transition.second); + } + ++currentRow; + } + ++currentRowGroup; + newObservations.push_back(observationMap[pomdp.getObservation(iter)]); + } + + auto newTransitionMatrix = smb.build(currentRow, newNrOfStates, currentRowGroup); + //STORM_PRINT(newTransitionMatrix) + storm::storage::sparse::ModelComponents components(newTransitionMatrix, newLabeling); + components.observabilityClasses = newObservations; + + auto newPomdp = storm::models::sparse::Pomdp(components); + + newPomdp.printModelInformationToStream(std::cout); + + return std::make_shared>(newPomdp); + } + + template class KnownProbabilityTransformer; + template class KnownProbabilityTransformer; + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/transformer/KnownProbabilityTransformer.h b/src/storm-pomdp/transformer/KnownProbabilityTransformer.h new file mode 100644 index 000000000..e043c3f99 --- /dev/null +++ b/src/storm-pomdp/transformer/KnownProbabilityTransformer.h @@ -0,0 +1,17 @@ +#include "storm/api/storm.h" +#include "storm/models/sparse/Pomdp.h" + +namespace storm { + namespace pomdp { + namespace transformer { + template + class KnownProbabilityTransformer { + public: + KnownProbabilityTransformer(); + + std::shared_ptr> + transform(storm::models::sparse::Pomdp const &pomdp, storm::storage::BitVector &prob0States, storm::storage::BitVector &prob1States); + }; + } + } +} diff --git a/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp b/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp index d4791ec45..19f5af156 100644 --- a/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp +++ b/src/storm-pomdp/transformer/MakePOMDPCanonic.cpp @@ -95,7 +95,13 @@ namespace storm { void actionIdentifiersToStream(std::ostream& stream, std::vector const& actionIdentifiers, ChoiceLabelIdStorage const& labelStorage) { stream << "actions: {"; + bool first = true; for (auto ai : actionIdentifiers) { + if (first) { + first = false; + } else { + stream << " "; + } stream << "[" << ai.choiceLabelId << " (" << labelStorage.getLabel(ai.choiceLabelId) << ")"; stream << ", " << ai.choiceOriginId << "]"; } @@ -105,8 +111,14 @@ namespace storm { template void actionIdentifiersToStream(std::ostream& stream, std::map const& actionIdentifiers, ChoiceLabelIdStorage const& labelStorage) { stream << "actions: {"; + bool first = true; for (auto ai : actionIdentifiers) { - stream << "[" << ai.first.choiceLabelId << "('" << labelStorage.getLabel(ai.first.choiceLabelId) << "')"; + if (first) { + first = false; + } else { + stream << " "; + } + stream << "[" << ai.first.choiceLabelId << " (" << labelStorage.getLabel(ai.first.choiceLabelId) << ")"; stream << ", " << ai.first.choiceOriginId << "]"; } stream << "}"; @@ -144,7 +156,7 @@ namespace storm { template std::string MakePOMDPCanonic::getStateInformation(uint64_t state) const { if(pomdp.hasStateValuations()) { - return std::to_string(state) + "[" + pomdp.getStateValuations().getStateInfo(state) + "]"; + return std::to_string(state) + " " + pomdp.getStateValuations().getStateInfo(state); } else { return std::to_string(state); } @@ -242,7 +254,7 @@ namespace storm { detail::actionIdentifiersToStream(std::cout, actionIdentifiers, labelStorage); std::cout << " according to state " << state << "." << std::endl; - STORM_LOG_THROW(false, storm::exceptions::AmbiguousModelException, "Actions identifiers do not align between states '" << getStateInformation(state) << "' and '" << getStateInformation(actionIdentifierDefinition[observation]) << "', both having observation " << observation << ". See output above for more information."); + STORM_LOG_THROW(false, storm::exceptions::AmbiguousModelException, "Actions identifiers do not align between states \n\t" << getStateInformation(state) << "\nand\n\t" << getStateInformation(actionIdentifierDefinition[observation]) << "\nboth having observation " << observation << ". See output above for more information."); } } diff --git a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp index 827493efc..c23c828ab 100644 --- a/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp +++ b/src/storm-pomdp/transformer/PomdpMemoryUnfolder.cpp @@ -185,5 +185,8 @@ namespace storm { } template class PomdpMemoryUnfolder; + + template + class PomdpMemoryUnfolder; } } \ No newline at end of file diff --git a/src/storm/adapters/MathsatExpressionAdapter.h b/src/storm/adapters/MathsatExpressionAdapter.h index 88ed84dcd..ca6de5d8c 100644 --- a/src/storm/adapters/MathsatExpressionAdapter.h +++ b/src/storm/adapters/MathsatExpressionAdapter.h @@ -106,6 +106,10 @@ namespace storm { STORM_LOG_ASSERT(declarationVariablePair != declarationToVariableMapping.end(), "Unknown variable declaration."); return declarationVariablePair->second; } + + std::unordered_map const& getAllDeclaredVariables() const { + return variableToDeclarationMapping; + } virtual boost::any visit(storm::expressions::BinaryBooleanFunctionExpression const& expression, boost::any const& data) override { msat_term leftResult = boost::any_cast(expression.getFirstOperand()->accept(*this, data)); diff --git a/src/storm/adapters/Z3ExpressionAdapter.cpp b/src/storm/adapters/Z3ExpressionAdapter.cpp index 34dea0fa3..b84c9ba08 100644 --- a/src/storm/adapters/Z3ExpressionAdapter.cpp +++ b/src/storm/adapters/Z3ExpressionAdapter.cpp @@ -37,8 +37,7 @@ namespace storm { result = result && assertion; } additionalAssertions.clear(); - - return result; + return result.simplify(); } z3::expr Z3ExpressionAdapter::translateExpression(storm::expressions::Variable const& variable) { diff --git a/src/storm/models/sparse/Model.cpp b/src/storm/models/sparse/Model.cpp index 1fe8c980e..bd62c17f5 100644 --- a/src/storm/models/sparse/Model.cpp +++ b/src/storm/models/sparse/Model.cpp @@ -363,7 +363,9 @@ namespace storm { storm::utility::outputFixedWidth(outStream, this->getLabelsOfState(state), maxWidthLabel); outStream << "}"; } - + + outStream << this->additionalDotStateInfo(state); + // If we are to include some values for the state as well, we do so now. if (firstValue != nullptr || secondValue != nullptr) { outStream << " ["; @@ -397,7 +399,12 @@ namespace storm { outStream << "}" << std::endl; } } - + + template + std::string Model::additionalDotStateInfo(uint64_t state) const { + return ""; + } + template std::set Model::getLabelsOfState(storm::storage::sparse::state_type state) const { return this->stateLabeling.getLabelsOfState(state); diff --git a/src/storm/models/sparse/Model.h b/src/storm/models/sparse/Model.h index 4b8616f8a..c86a5924f 100644 --- a/src/storm/models/sparse/Model.h +++ b/src/storm/models/sparse/Model.h @@ -335,8 +335,8 @@ namespace storm { * @param finalizeOutput A flag that sets whether or not the dot stream is closed with a curly brace. * @return A string containing the exported model in dot-format. */ - virtual void writeDotToStream(std::ostream& outStream, size_t maxWidthLabel = 30, bool includeLabeling = true, storm::storage::BitVector const* subsystem = nullptr, std::vector const* firstValue = nullptr, std::vector const* secondValue = nullptr, std::vector const* stateColoring = nullptr, std::vector const* colors = nullptr, std::vector* scheduler = nullptr, bool finalizeOutput = true) const; - + virtual void writeDotToStream(std::ostream& outStream, size_t maxWidthLabel = 30, bool includeLabeling = true, storm::storage::BitVector const* subsystem = nullptr, std::vector const* firstValue = nullptr, std::vector const* secondValue = nullptr, std::vector const* stateColoring = nullptr, std::vector const* colors = nullptr, std::vector* scheduler = nullptr, bool finalizeOutput = true) const; + /*! * Retrieves the set of labels attached to the given state. * @@ -396,6 +396,13 @@ namespace storm { * @param out The stream the information is to be printed to. */ void printRewardModelsInformationToStream(std::ostream& out) const; + + /*! + * Return a string that is additonally added to the state information in the dot stream. + * @param state + * @return + */ + virtual std::string additionalDotStateInfo(uint64_t state) const; private: diff --git a/src/storm/models/sparse/Pomdp.cpp b/src/storm/models/sparse/Pomdp.cpp index 2ba70383b..135b74f48 100644 --- a/src/storm/models/sparse/Pomdp.cpp +++ b/src/storm/models/sparse/Pomdp.cpp @@ -54,17 +54,50 @@ namespace storm { return nrObservations; } + template + uint64_t Pomdp::getMaxNrStatesWithSameObservation() const { + std::map counts; + for (auto const& obs : observations) { + auto insertionRes = counts.emplace(obs, 1ull); + if (!insertionRes.second) { + ++insertionRes.first->second; + } + } + uint64_t result = 0; + for (auto const& count : counts) { + result = std::max(result, count.second); + } + return result; + } + template std::vector const& Pomdp::getObservations() const { return observations; } + template - bool Pomdp::isCanonic() const { - return canonicFlag; + std::string Pomdp::additionalDotStateInfo(uint64_t state) const { + return "<" + std::to_string(getObservation(state)) + ">"; } + template + std::vector + Pomdp::getStatesWithObservation(uint32_t observation) const { + std::vector result; + for (uint64_t state = 0; state < this->getNumberOfStates(); ++state) { + if (this->getObservation(state) == observation) { + result.push_back(state); + } + } + return result; + } + + template + bool Pomdp::isCanonic() const { + return canonicFlag; + } diff --git a/src/storm/models/sparse/Pomdp.h b/src/storm/models/sparse/Pomdp.h index b50f41fa2..b09f87886 100644 --- a/src/storm/models/sparse/Pomdp.h +++ b/src/storm/models/sparse/Pomdp.h @@ -59,13 +59,25 @@ namespace storm { uint64_t getNrObservations() const; + /*! + * Returns the number of hidden values, i.e. the maximum number of states with the same observation + */ + uint64_t getMaxNrStatesWithSameObservation() const; + std::vector const& getObservations() const; + std::vector getStatesWithObservation(uint32_t observation) const; + bool isCanonic() const; - - protected: + /*! + * Return a string that is additonally added to the state information in the dot stream. + * @param state + * @return + */ + virtual std::string additionalDotStateInfo(uint64_t state) const override; + // TODO: consider a bitvector based presentation (depending on our needs). std::vector observations; diff --git a/src/storm/settings/SettingsManager.cpp b/src/storm/settings/SettingsManager.cpp index da236433c..027a24df6 100644 --- a/src/storm/settings/SettingsManager.cpp +++ b/src/storm/settings/SettingsManager.cpp @@ -11,7 +11,6 @@ #include "storm/exceptions/IllegalFunctionCallException.h" #include "storm/exceptions/OptionParserException.h" -#include "storm/utility/storm-version.h" #include "storm/settings/modules/GeneralSettings.h" #include "storm/settings/modules/CoreSettings.h" #include "storm/settings/modules/IOSettings.h" diff --git a/src/storm/solver/MathsatSmtSolver.cpp b/src/storm/solver/MathsatSmtSolver.cpp index 219efe530..f8c1d6f86 100644 --- a/src/storm/solver/MathsatSmtSolver.cpp +++ b/src/storm/solver/MathsatSmtSolver.cpp @@ -32,6 +32,22 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unable to retrieve double value from model that only contains boolean values."); } + std::string MathsatSmtSolver::MathsatAllsatModelReference::toString() const { + std::stringstream str; + bool first = true; + str << "["; + for (auto const& varSlot : variableToSlotMapping) { + if (first) { + first = false; + } else { + str << ", "; + } + str << varSlot.first.getName() << "=" << std::boolalpha << getBooleanValue(varSlot.first); + } + str << "]"; + return str.str(); + } + MathsatSmtSolver::MathsatModelReference::MathsatModelReference(storm::expressions::ExpressionManager const& manager, msat_env const& env, storm::adapters::MathsatExpressionAdapter& expressionAdapter) : ModelReference(manager), env(env), expressionAdapter(expressionAdapter) { // Intentionally left empty. } @@ -62,6 +78,25 @@ namespace storm { storm::expressions::Expression value = expressionAdapter.translateExpression(msatValue); return value.evaluateAsDouble(); } + + std::string MathsatSmtSolver::MathsatModelReference::toString() const { + std::stringstream str; + bool first = true; + str << "["; + for (auto const& varDecl : expressionAdapter.getAllDeclaredVariables()) { + if (first) { + first = false; + } else { + str << ", "; + } + msat_term msatValue = msat_get_model_value(env, expressionAdapter.translateExpression(varDecl.first)); + STORM_LOG_ASSERT(!MSAT_ERROR_TERM(msatValue), "Unable to retrieve value of variable in model. This could be caused by calls to the solver between checking for satisfiability and model retrieval."); + str << varDecl.first.getName() << "=" << expressionAdapter.translateExpression(msatValue); + } + str << "]"; + return str.str(); + } + #endif MathsatSmtSolver::MathsatSmtSolver(storm::expressions::ExpressionManager& manager, Options const& options) : SmtSolver(manager) diff --git a/src/storm/solver/MathsatSmtSolver.h b/src/storm/solver/MathsatSmtSolver.h index 0d27c71af..851c00d82 100644 --- a/src/storm/solver/MathsatSmtSolver.h +++ b/src/storm/solver/MathsatSmtSolver.h @@ -38,7 +38,8 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; - + virtual std::string toString() const override; + private: msat_env const& env; msat_term* model; @@ -54,7 +55,8 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; - + virtual std::string toString() const override; + private: msat_env const& env; storm::adapters::MathsatExpressionAdapter& expressionAdapter; diff --git a/src/storm/solver/SmtSolver.h b/src/storm/solver/SmtSolver.h index 7ee3896bd..4077c244a 100644 --- a/src/storm/solver/SmtSolver.h +++ b/src/storm/solver/SmtSolver.h @@ -23,6 +23,8 @@ namespace storm { public: //! possible check results enum class CheckResult { Sat, Unsat, Unknown }; + + /*! * The base class for all model references. They are used to provide a lightweight method of accessing the @@ -48,6 +50,8 @@ namespace storm { * @return The expression manager associated with this model reference. */ storm::expressions::ExpressionManager const& getManager() const; + + virtual std::string toString() const = 0; private: // The expression manager responsible for the variables whose value can be requested via this model diff --git a/src/storm/solver/SmtlibSmtSolver.cpp b/src/storm/solver/SmtlibSmtSolver.cpp index aad46d81e..1ef7fda76 100644 --- a/src/storm/solver/SmtlibSmtSolver.cpp +++ b/src/storm/solver/SmtlibSmtSolver.cpp @@ -40,6 +40,11 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "functionality not (yet) implemented"); } + std::string SmtlibSmtSolver::SmtlibModelReference::toString() const { + STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "functionality not (yet) implemented"); + } + + SmtlibSmtSolver::SmtlibSmtSolver(storm::expressions::ExpressionManager& manager, bool useCarlExpressions) : SmtSolver(manager), isCommandFileOpen(false), expressionAdapter(nullptr), useCarlExpressions(useCarlExpressions) { #ifndef STORM_HAVE_CARL STORM_LOG_THROW(!useCarlExpressions, storm::exceptions::IllegalArgumentException, "Tried to use carl expressions but storm is not linked with CARL"); diff --git a/src/storm/solver/SmtlibSmtSolver.h b/src/storm/solver/SmtlibSmtSolver.h index 6ae64eb92..cd31e58d5 100644 --- a/src/storm/solver/SmtlibSmtSolver.h +++ b/src/storm/solver/SmtlibSmtSolver.h @@ -26,7 +26,7 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; - + virtual std::string toString() const override; }; public: diff --git a/src/storm/solver/Z3SmtSolver.cpp b/src/storm/solver/Z3SmtSolver.cpp index 13579a172..7d3745fa7 100644 --- a/src/storm/solver/Z3SmtSolver.cpp +++ b/src/storm/solver/Z3SmtSolver.cpp @@ -44,7 +44,18 @@ namespace storm { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Storm is compiled without Z3 support."); #endif } - + + std::string Z3SmtSolver::Z3ModelReference::toString() const { +#ifdef STORM_HAVE_Z3 + std::stringstream sstr; + sstr << model; + return sstr.str(); +#else + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Storm is compiled without Z3 support."); +#endif + } + + Z3SmtSolver::Z3SmtSolver(storm::expressions::ExpressionManager& manager) : SmtSolver(manager) #ifdef STORM_HAVE_Z3 , context(nullptr), solver(nullptr), expressionAdapter(nullptr), lastCheckAssumptions(false), lastResult(CheckResult::Unknown) diff --git a/src/storm/solver/Z3SmtSolver.h b/src/storm/solver/Z3SmtSolver.h index 6616e0292..fe92299a7 100644 --- a/src/storm/solver/Z3SmtSolver.h +++ b/src/storm/solver/Z3SmtSolver.h @@ -22,6 +22,7 @@ namespace storm { virtual bool getBooleanValue(storm::expressions::Variable const& variable) const override; virtual int_fast64_t getIntegerValue(storm::expressions::Variable const& variable) const override; virtual double getRationalValue(storm::expressions::Variable const& variable) const override; + virtual std::string toString() const override; private: #ifdef STORM_HAVE_Z3 diff --git a/src/storm/storage/Distribution.cpp b/src/storm/storage/Distribution.cpp index f40afb402..2290c611c 100644 --- a/src/storm/storage/Distribution.cpp +++ b/src/storm/storage/Distribution.cpp @@ -166,7 +166,17 @@ namespace storm { } } - + template + void Distribution::normalize() { + ValueType sum = storm::utility::zero(); + for (auto const& entry: distribution) { + sum += entry.second; + } + for (auto& entry: distribution) { + entry.second /= sum; + } + } + template class Distribution; template std::ostream& operator<<(std::ostream& out, Distribution const& distribution); diff --git a/src/storm/storage/Distribution.h b/src/storm/storage/Distribution.h index d7e0bd2fb..c3ac58dcc 100644 --- a/src/storm/storage/Distribution.h +++ b/src/storm/storage/Distribution.h @@ -144,6 +144,11 @@ namespace storm { */ ValueType getProbability(StateType const& state) const; + /*! + * Normalizes the distribution such that the values sum up to one. + */ + void normalize(); + private: // A list of states and the probabilities that are assigned to them. container_type distribution; diff --git a/src/storm/utility/vector.h b/src/storm/utility/vector.h index e5fda444e..af6098038 100644 --- a/src/storm/utility/vector.h +++ b/src/storm/utility/vector.h @@ -5,8 +5,8 @@ #include #include #include -#include -#include +#include "storm/adapters/RationalFunctionAdapter.h" +#include "storm/adapters/IntelTbbAdapter.h" #include @@ -142,6 +142,12 @@ namespace storm { return true; } + template + bool compareElementWise(std::vector const& left, std::vector const& right, Comparator comp = std::less()) { + STORM_LOG_ASSERT(left.size() == right.size(), "Expected that vectors for comparison have equal size"); + return std::equal(left.begin(), left.end(), right.begin(), comp); + } + /*! * Selects the elements from a vector at the specified positions and writes them consecutively into another vector. * @param vector The vector into which the selected elements are to be written.