diff --git a/resources/3rdparty/CMakeLists.txt b/resources/3rdparty/CMakeLists.txt index 46e927d85..2a5f13dab 100644 --- a/resources/3rdparty/CMakeLists.txt +++ b/resources/3rdparty/CMakeLists.txt @@ -170,8 +170,8 @@ if(Z3_FOUND) set(STORM_Z3_API_USES_STANDARD_INTEGERS ON) endif() - add_imported_library(z3 SHARED ${Z3_LIBRARIES} ${Z3_INCLUDE_DIRS}) - list(APPEND STORM_DEP_TARGETS z3_SHARED) + add_imported_library(Z3 SHARED ${Z3_LIBRARIES} ${Z3_INCLUDE_DIRS}) + list(APPEND STORM_DEP_TARGETS Z3_SHARED) else() message(WARNING "Storm - Could not obtain Z3 version. Building of Prism/JANI models will not be supported.") set(Z3_FOUND FALSE) @@ -197,13 +197,13 @@ include(${STORM_3RDPARTY_SOURCE_DIR}/include_glpk.cmake) ############################################################# if (STORM_USE_GUROBI) - find_package(Gurobi QUIET REQUIRED) + find_package(GUROBI QUIET REQUIRED) set(STORM_HAVE_GUROBI ${GUROBI_FOUND}) if (GUROBI_FOUND) if (EXISTS ${GUROBI_LIBRARY}) message (STATUS "Storm - Linking with Gurobi (${GUROBI_CXX_LIBRARY}).") - add_imported_library(Gurobi SHARED ${GUROBI_LIBRARY} ${GUROBI_INCLUDE_DIRS}) - list(APPEND STORM_DEP_TARGETS Gurobi_SHARED) + add_imported_library(GUROBI SHARED ${GUROBI_LIBRARY} ${GUROBI_INCLUDE_DIRS}) + list(APPEND STORM_DEP_TARGETS GUROBI_SHARED) else() # The FindGurobi.cmake script needs to be updated every now and then as the library file contains the version number... message(FATAL_ERROR "Gurobi Library ${GUROBI_LIBRARY} not found. If your Gurobi Version is higher then 9.0.0, please contact the Storm developers.") @@ -496,11 +496,11 @@ add_dependencies(sylvan_STATIC sylvan) list(APPEND STORM_DEP_TARGETS sylvan_STATIC) -find_package(Hwloc QUIET REQUIRED) +find_package(HWLOC QUIET REQUIRED) if(HWLOC_FOUND) message(STATUS "Storm - Linking with hwloc ${HWLOC_VERSION}.") - add_imported_library(hwloc STATIC ${HWLOC_LIBRARIES} "") - list(APPEND STORM_DEP_TARGETS hwloc_STATIC) + add_imported_library(HWLOC STATIC ${HWLOC_LIBRARIES} "") + list(APPEND STORM_DEP_TARGETS HWLOC_STATIC) else() if(${OPERATING_SYSTEM} MATCHES "Linux") message(FATAL_ERROR "HWLOC is required on Linux but was not found.") diff --git a/resources/cmake/find_modules/FindGurobi.cmake b/resources/cmake/find_modules/FindGUROBI.cmake similarity index 100% rename from resources/cmake/find_modules/FindGurobi.cmake rename to resources/cmake/find_modules/FindGUROBI.cmake diff --git a/resources/cmake/find_modules/FindHwloc.cmake b/resources/cmake/find_modules/FindHWLOC.cmake similarity index 100% rename from resources/cmake/find_modules/FindHwloc.cmake rename to resources/cmake/find_modules/FindHWLOC.cmake diff --git a/resources/cmake/find_modules/FindZ3.cmake b/resources/cmake/find_modules/FindZ3.cmake index d0978b3bc..6c6a7beb1 100644 --- a/resources/cmake/find_modules/FindZ3.cmake +++ b/resources/cmake/find_modules/FindZ3.cmake @@ -32,7 +32,7 @@ set(Z3_SOLVER ${Z3_EXEC}) # set the LIBZ3_FOUND variable by utilizing the following macro # (which also handles the REQUIRED and QUIET arguments) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(z3 DEFAULT_MSG +find_package_handle_standard_args(Z3 DEFAULT_MSG Z3_LIBRARY Z3_INCLUDE_DIR) IF (NOT Z3_FIND_QUIETLY) diff --git a/src/storm-cli-utilities/model-handling.h b/src/storm-cli-utilities/model-handling.h index 0e4fe4812..e6508c190 100644 --- a/src/storm-cli-utilities/model-handling.h +++ b/src/storm-cli-utilities/model-handling.h @@ -276,6 +276,7 @@ namespace storm { case ModelProcessingInformation::ValueType::FinitePrecision: return storm::utility::canHandle(mpi.engine, input.preprocessedProperties.is_initialized() ? input.preprocessedProperties.get() : input.properties, input.model.get()); } + return false; }; mpi.isCompatible = checkCompatibleSettings(); if (!mpi.isCompatible) { @@ -565,6 +566,10 @@ namespace storm { if (ioSettings.isExportExplicitSet()) { storm::api::exportSparseModelAsDrn(model, ioSettings.getExportExplicitFilename(), input.model ? input.model.get().getParameterNames() : std::vector()); } + + if (ioSettings.isExportDdSet()) { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Exporting in drdd format is only supported for DDs."); + } if (ioSettings.isExportDotSet()) { storm::api::exportSparseModelAsDot(model, ioSettings.getExportDotFilename(), ioSettings.getExportDotMaxWidth()); @@ -575,6 +580,10 @@ namespace storm { void exportDdModel(std::shared_ptr> const& model, SymbolicInput const& input) { auto ioSettings = storm::settings::getModule(); + if (ioSettings.isExportExplicitSet()) { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Exporting in drn format is only supported for sparse models."); + } + if (ioSettings.isExportDdSet()) { storm::api::exportSparseModelAsDrdd(model, ioSettings.getExportDdFilename()); } diff --git a/src/storm-gspn/storage/gspn/GspnBuilder.cpp b/src/storm-gspn/storage/gspn/GspnBuilder.cpp index c39b5c0f7..41ef64f96 100644 --- a/src/storm-gspn/storage/gspn/GspnBuilder.cpp +++ b/src/storm-gspn/storage/gspn/GspnBuilder.cpp @@ -13,7 +13,7 @@ namespace storm { gspnName = name; } - uint_fast64_t GspnBuilder::addPlace(boost::optional capacity, uint_fast64_t const& initialTokens, std::string const& name) { + uint_fast64_t GspnBuilder::addPlace(boost::optional const& capacity, uint_fast64_t const& initialTokens, std::string const& name) { auto newId = places.size(); auto place = storm::gspn::Place(newId); place.setCapacity(capacity); @@ -70,7 +70,7 @@ namespace storm { return addTimedTransition(priority, rate, 1, name); } - uint_fast64_t GspnBuilder::addTimedTransition(uint_fast64_t const &priority, double const &rate, boost::optional numServers, std::string const& name) { + uint_fast64_t GspnBuilder::addTimedTransition(uint_fast64_t const &priority, double const &rate, boost::optional const& numServers, std::string const& name) { auto trans = storm::gspn::TimedTransition(); auto newId = GSPN::timedTransitionIdToTransitionId(timedTransitions.size()); trans.setName(name); diff --git a/src/storm-gspn/storage/gspn/GspnBuilder.h b/src/storm-gspn/storage/gspn/GspnBuilder.h index 5e4db4a34..fb4cd2ad7 100644 --- a/src/storm-gspn/storage/gspn/GspnBuilder.h +++ b/src/storm-gspn/storage/gspn/GspnBuilder.h @@ -25,7 +25,7 @@ namespace storm { * A capacity of -1 indicates an unbounded place. * @param initialTokens The number of inital tokens in the place. */ - uint_fast64_t addPlace(boost::optional capacity = 1, uint_fast64_t const& initialTokens = 0, std::string const& name = ""); + uint_fast64_t addPlace(boost::optional const& capacity = 1, uint_fast64_t const& initialTokens = 0, std::string const& name = ""); void setPlaceLayoutInfo(uint64_t placeId, LayoutInfo const& layoutInfo); @@ -51,7 +51,7 @@ namespace storm { * @param rate The rate for the transition. * @param numServers The number of servers this transition has (in case of K-Server semantics) or boost::none (in case of Infinite-Server-Semantics). */ - uint_fast64_t addTimedTransition(uint_fast64_t const &priority, RateType const& rate, boost::optional numServers, std::string const& name = ""); + uint_fast64_t addTimedTransition(uint_fast64_t const &priority, RateType const& rate, boost::optional const& numServers, std::string const& name = ""); void setTransitionLayoutInfo(uint64_t transitionId, LayoutInfo const& layoutInfo); diff --git a/src/storm-gspn/storage/gspn/Place.cpp b/src/storm-gspn/storage/gspn/Place.cpp index ca9a4a9e1..8d7347936 100644 --- a/src/storm-gspn/storage/gspn/Place.cpp +++ b/src/storm-gspn/storage/gspn/Place.cpp @@ -29,7 +29,7 @@ namespace storm { return this->numberOfInitialTokens; } - void Place::setCapacity(boost::optional cap) { + void Place::setCapacity(boost::optional const& cap) { this->capacity = cap; } diff --git a/src/storm-gspn/storage/gspn/Place.h b/src/storm-gspn/storage/gspn/Place.h index 55c9da6fb..b6a8b4ca1 100644 --- a/src/storm-gspn/storage/gspn/Place.h +++ b/src/storm-gspn/storage/gspn/Place.h @@ -56,7 +56,7 @@ namespace storm { * @param capacity The capacity of this place. A non-negative number represents the capacity. * boost::none indicates that the flag is not set. */ - void setCapacity(boost::optional capacity); + void setCapacity(boost::optional const& capacity); /*! * Returns the capacity of tokens of this place. diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp index 8006b3851..6b5b17677 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp @@ -19,6 +19,7 @@ namespace storm { const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string numericPrecisionOption = "numeric-precision"; const std::string cacheSimplicesOption = "cache-simplices"; + const std::string unfoldBeliefMdpOption = "unfold-belief-mdp"; GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) { @@ -35,6 +36,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, cacheSimplicesOption, false,"Enables caching of simplices which requires more memory but can be faster.").build()); + this->addOption(storm::settings::OptionBuilder(moduleName, unfoldBeliefMdpOption, false,"Sets the (initial-) size threshold of the unfolded belief MDP (higher means more precise results, 0 means automatic choice)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the maximal number of states").setDefaultValueUnsignedInteger(0).build()).build()); } bool GridApproximationSettings::isRefineSet() const { @@ -65,6 +67,14 @@ namespace storm { return this->getOption(cacheSimplicesOption).getHasOptionBeenSet(); } + bool GridApproximationSettings::isUnfoldBeliefMdpSizeThresholdSet() const { + return this->getOption(unfoldBeliefMdpOption).getHasOptionBeenSet(); + } + + uint64_t GridApproximationSettings::getUnfoldBeliefMdpSizeThreshold() const { + return this->getOption(unfoldBeliefMdpOption).getArgumentByName("value").getValueAsUnsignedInteger(); + } + } // namespace modules } // namespace settings } // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h index a01fdbd77..88e484128 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h @@ -27,7 +27,8 @@ namespace storm { bool isNumericPrecisionSetFromDefault() const; double getNumericPrecision() const; bool isCacheSimplicesSet() const; - + bool isUnfoldBeliefMdpSizeThresholdSet() const; + uint64_t getUnfoldBeliefMdpSizeThreshold() const; // The name of the module. static const std::string moduleName; diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 16b516b5b..a668eb9af 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -13,6 +13,7 @@ namespace storm { namespace modules { const std::string POMDPSettings::moduleName = "pomdp"; + const std::string noCanonicOption = "nocanonic"; const std::string exportAsParametricModelOption = "parametric-drn"; const std::string gridApproximationOption = "gridapproximation"; const std::string qualitativeReductionOption = "qualitativereduction"; @@ -31,6 +32,7 @@ namespace storm { const std::string checkFullyObservableOption = "check-fully-observable"; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { + this->addOption(storm::settings::OptionBuilder(moduleName, noCanonicOption, false, "If this is set, actions will not be ordered canonically. Could yield incorrect results.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, qualitativeReductionOption, false, "Reduces the model size by performing qualitative analysis (E.g. merge states with prob. 1.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, analyzeUniqueObservationsOption, false, "Computes the states with a unique observation").build()); @@ -47,6 +49,10 @@ namespace storm { } + bool POMDPSettings::isNoCanonicSet() const { + return this->getOption(noCanonicOption).getHasOptionBeenSet(); + } + bool POMDPSettings::isExportToParametricSet() const { return this->getOption(exportAsParametricModelOption).getHasOptionBeenSet(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 768766536..6754ac55c 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -26,6 +26,7 @@ namespace storm { bool isQualitativeReductionSet() const; + bool isNoCanonicSet() const; bool isGridApproximationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index a62191829..f2feb619d 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -24,98 +24,7 @@ #include "storm-pomdp/analysis/FormulaInformation.h" #include "storm-pomdp/analysis/MemlessStrategySearchQualitative.h" #include "storm-pomdp/analysis/QualitativeStrategySearchNaive.h" -// -//template -//bool extractTargetAndSinkObservationSets(std::shared_ptr> const& pomdp, storm::logic::Formula const& subformula, std::set& targetObservationSet, storm::storage::BitVector& targetStates, storm::storage::BitVector& badStates) { -// //TODO refactor (use model checker to determine the states, then transform into observations). -// //TODO rename into appropriate function name. -// bool validFormula = false; -// if (subformula.isEventuallyFormula()) { -// storm::logic::EventuallyFormula const &eventuallyFormula = subformula.asEventuallyFormula(); -// storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); -// if (subformula2.isAtomicLabelFormula()) { -// storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); -// validFormula = true; -// std::string targetLabel = alFormula.getLabel(); -// auto labeling = pomdp->getStateLabeling(); -// for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { -// if (labeling.getStateHasLabel(targetLabel, state)) { -// targetObservationSet.insert(pomdp->getObservation(state)); -// targetStates.set(state); -// } -// } -// } else if (subformula2.isAtomicExpressionFormula()) { -// validFormula = true; -// std::stringstream stream; -// stream << subformula2.asAtomicExpressionFormula().getExpression(); -// storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); -// std::string targetLabel = formula3.getLabel(); -// auto labeling = pomdp->getStateLabeling(); -// for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { -// if (labeling.getStateHasLabel(targetLabel, state)) { -// targetObservationSet.insert(pomdp->getObservation(state)); -// targetStates.set(state); -// } -// } -// } -// } else if (subformula.isUntilFormula()) { -// storm::logic::UntilFormula const &untilFormula = subformula.asUntilFormula(); -// storm::logic::Formula const &subformula1 = untilFormula.getLeftSubformula(); -// if (subformula1.isAtomicLabelFormula()) { -// storm::logic::AtomicLabelFormula const &alFormula = subformula1.asAtomicLabelFormula(); -// std::string targetLabel = alFormula.getLabel(); -// auto labeling = pomdp->getStateLabeling(); -// for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { -// if (!labeling.getStateHasLabel(targetLabel, state)) { -// badStates.set(state); -// } -// } -// } else if (subformula1.isAtomicExpressionFormula()) { -// std::stringstream stream; -// stream << subformula1.asAtomicExpressionFormula().getExpression(); -// storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); -// std::string targetLabel = formula3.getLabel(); -// auto labeling = pomdp->getStateLabeling(); -// for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { -// if (!labeling.getStateHasLabel(targetLabel, state)) { -// badStates.set(state); -// } -// } -// } else { -// return false; -// } -// storm::logic::Formula const &subformula2 = untilFormula.getRightSubformula(); -// if (subformula2.isAtomicLabelFormula()) { -// storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); -// validFormula = true; -// std::string targetLabel = alFormula.getLabel(); -// auto labeling = pomdp->getStateLabeling(); -// for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { -// if (labeling.getStateHasLabel(targetLabel, state)) { -// targetObservationSet.insert(pomdp->getObservation(state)); -// targetStates.set(state); -// } -// -// } -// } else if (subformula2.isAtomicExpressionFormula()) { -// validFormula = true; -// std::stringstream stream; -// stream << subformula2.asAtomicExpressionFormula().getExpression(); -// storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); -// std::string targetLabel = formula3.getLabel(); -// auto labeling = pomdp->getStateLabeling(); -// for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { -// if (labeling.getStateHasLabel(targetLabel, state)) { -// targetObservationSet.insert(pomdp->getObservation(state)); -// targetStates.set(state); -// } -// -// } -// } -// } -// return validFormula; -//} -// + template std::set extractObservations(storm::models::sparse::Pomdp const& pomdp, storm::storage::BitVector const& states) { // TODO move. @@ -125,85 +34,6 @@ std::set extractObservations(storm::models::sparse::Pomdp c } return observations; } -// -///*! -// * Entry point for the pomdp backend. -// * -// * @param argc The argc argument of main(). -// * @param argv The argv argument of main(). -// * @return Return code, 0 if successfull, not 0 otherwise. -// */ -//int main(const int argc, const char** argv) { -// //try { -// storm::utility::setUp(); -// storm::cli::printHeader("Storm-pomdp", argc, argv); -// initializeSettings(); -// -// bool optionsCorrect = storm::cli::parseOptions(argc, argv); -// if (!optionsCorrect) { -// return -1; -// } -// storm::cli::setUrgentOptions(); -// -// auto const& coreSettings = storm::settings::getModule(); -// auto const& pomdpSettings = storm::settings::getModule(); -// auto const& ioSettings = storm::settings::getModule(); -// auto const &general = storm::settings::getModule(); -// auto const &debug = storm::settings::getModule(); -// auto const& pomdpQualSettings = storm::settings::getModule(); -// -// if (general.isVerboseSet()) { -// storm::utility::setLogLevel(l3pp::LogLevel::INFO); -// } -// if (debug.isDebugSet()) { -// storm::utility::setLogLevel(l3pp::LogLevel::DEBUG); -// } -// if (debug.isTraceSet()) { -// storm::utility::setLogLevel(l3pp::LogLevel::TRACE); -// } -// if (debug.isLogfileSet()) { -// storm::utility::initializeFileLogging(); -// } -// -// // For several engines, no model building step is performed, but the verification is started right away. -// storm::settings::modules::CoreSettings::Engine engine = coreSettings.getEngine(); -// -// storm::cli::SymbolicInput symbolicInput = storm::cli::parseAndPreprocessSymbolicInput(); -// // We should not export here if we are going to do some processing first. -// auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, engine); -// STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); -// std::shared_ptr> pomdp = model->template as>(); -// storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); -// pomdp = makeCanonic.transform(); -// -// std::shared_ptr formula; -// if (!symbolicInput.properties.empty()) { -// formula = symbolicInput.properties.front().getRawFormula(); -// STORM_PRINT_AND_LOG("Analyzing property '" << *formula << "'" << std::endl); -// STORM_LOG_WARN_COND(symbolicInput.properties.size() == 1, "There is currently no support for multiple properties. All other properties will be ignored."); -// } -// -// if (pomdpSettings.isAnalyzeUniqueObservationsSet()) { -// STORM_PRINT_AND_LOG("Analyzing states with unique observation ..." << std::endl); -// storm::analysis::UniqueObservationStates uniqueAnalysis(*pomdp); -// std::cout << uniqueAnalysis.analyse() << std::endl; -// } -// -// if (formula) { -// storm::logic::ProbabilityOperatorFormula const &probFormula = formula->asProbabilityOperatorFormula(); -// storm::logic::Formula const &subformula1 = probFormula.getSubformula(); -// -// -// if (formula->isProbabilityOperatorFormula()) { -// boost::optional prob1States; -// boost::optional prob0States; -// if (pomdpSettings.isSelfloopReductionSet() && !storm::solver::minimize(formula->asProbabilityOperatorFormula().getOptimalityType())) { -// STORM_PRINT_AND_LOG("Eliminating self-loop choices ..."); -// uint64_t oldChoiceCount = pomdp->getNumberOfChoices(); -// storm::transformer::GlobalPOMDPSelfLoopEliminator selfLoopEliminator(*pomdp); -// pomdp = selfLoopEliminator.transform(); -// STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); -//======= #include "storm/api/storm.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" @@ -352,13 +182,16 @@ namespace storm { if (pomdpSettings.isGridApproximationSet()) { STORM_PRINT_AND_LOG("Applying grid approximation... "); auto const& gridSettings = storm::settings::getModule(); - typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker::Options options; + typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker>::Options options; options.initialGridResolution = gridSettings.getGridResolution(); options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); options.doRefinement = gridSettings.isRefineSet(); options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); + if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) { + options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); + } if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); @@ -367,20 +200,16 @@ namespace storm { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); } } - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(*pomdp, options); - std::unique_ptr> result = checker.check(formula); + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker> checker(*pomdp, options); + auto result = checker.check(formula); checker.printStatisticsToStream(std::cout); - if (result) { - if (storm::utility::resources::isTerminate()) { - STORM_PRINT_AND_LOG("\nResult till abort: ") - } else { - STORM_PRINT_AND_LOG("\nResult: ") - } - printResult(result->underApproxValue, result->overApproxValue); - STORM_PRINT_AND_LOG(std::endl); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("\nResult till abort: ") } else { - STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl); + STORM_PRINT_AND_LOG("\nResult: ") } + printResult(result.lowerBound, result.upperBound); + STORM_PRINT_AND_LOG(std::endl); analysisPerformed = true; } if (pomdpSettings.isMemlessSearchSet()) { @@ -498,9 +327,11 @@ namespace storm { STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); std::shared_ptr> pomdp = model->template as>(); - storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); - pomdp = makeCanonic.transform(); - + if (!pomdpSettings.isNoCanonicSet()) { + storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); + pomdp = makeCanonic.transform(); + } + std::shared_ptr formula; if (!symbolicInput.properties.empty()) { formula = symbolicInput.properties.front().getRawFormula(); diff --git a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp index a8ef930fc..177367ddc 100644 --- a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp +++ b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp @@ -86,81 +86,9 @@ namespace storm { storm::storage::BitVector QualitativeAnalysis::analyseProb1Max(storm::logic::UntilFormula const& formula) const { // We consider the states that satisfy the formula with prob.1 under arbitrary schedulers as goal states. storm::storage::BitVector goalStates = storm::utility::graph::performProb1A(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), checkPropositionalFormula(formula.getLeftSubformula()), checkPropositionalFormula(formula.getRightSubformula())); - + STORM_LOG_TRACE("Prob1A states according to MDP: " << goalStates); // Now find a set of observations such that there is a memoryless scheduler inducing prob. 1 for each state whose observation is in the set. - storm::storage::BitVector candidateStates = goalStates | checkPropositionalFormula(formula.getLeftSubformula()); - storm::storage::BitVector candidateActions = pomdp.getTransitionMatrix().getRowFilter(candidateStates); - storm::storage::BitVector candidateObservations(pomdp.getNrObservations(), true); - - - bool converged = false; - while (!converged) { - converged = true; - - // Get the candidate states that can reach the goal with prob1 via candidate actions - storm::storage::BitVector newCandidates; - if (candidateActions.full()) { - newCandidates = storm::utility::graph::performProb1E(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), candidateStates, goalStates); - } else { - storm::storage::SparseMatrix filteredTransitions(pomdp.getTransitionMatrix().filterEntries(candidateActions)); - newCandidates = storm::utility::graph::performProb1E(filteredTransitions, filteredTransitions.getRowGroupIndices(), filteredTransitions.transpose(true), candidateStates, goalStates); - } - if (candidateStates != newCandidates) { - converged = false; - candidateStates = std::move(newCandidates); - } - - // Unselect all observations that have a non-candidate state - for (uint64_t state = candidateStates.getNextUnsetIndex(0); state < candidateStates.size(); state = candidateStates.getNextUnsetIndex(state + 1)) { - candidateObservations.set(pomdp.getObservation(state), false); - } - - // update the candidate actions to the set of actions that stay inside the candidate state set - std::vector candidateActionsPerObservation(pomdp.getNrObservations()); - for (auto const& state : candidateStates) { - auto& candidateActionsAtState = candidateActionsPerObservation[pomdp.getObservation(state)]; - if (candidateActionsAtState.size() == 0) { - candidateActionsAtState.resize(pomdp.getNumberOfChoices(state), true); - } - STORM_LOG_ASSERT(candidateActionsAtState.size() == pomdp.getNumberOfChoices(state), "State " + std::to_string(state) + " has " + std::to_string(pomdp.getNumberOfChoices(state)) + " actions, different from other with same observation (" + std::to_string(candidateActionsAtState.size()) + ")." ); - for (auto const& action : candidateActionsAtState) { - for (auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) { - if (!candidateStates.get(entry.getColumn())) { - candidateActionsAtState.set(action, false); - break; - } - } - } - } - - // Unselect all observations without such an action - for (auto const& o : candidateObservations) { - if (candidateActionsPerObservation[o].empty()) { - candidateObservations.set(o, false); - } - } - - // only keep the candidate states with a candidateObservation - for (auto const& state : candidateStates) { - if (!candidateObservations.get(pomdp.getObservation(state)) && !goalStates.get(state)) { - candidateStates.set(state, false); - converged = false; - } - } - - // Only keep the candidate actions originating from a candidateState. Also transform the representation of candidate actions - candidateActions.clear(); - for (auto const& state : candidateStates) { - uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state]; - for (auto const& action : candidateActionsPerObservation[pomdp.getObservation(state)]) { - candidateActions.set(offset + action); - } - } - } - - assert(goalStates.isSubsetOf(candidateStates)); - - return candidateStates; + return goalStates; } diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h new file mode 100644 index 000000000..e5d233aec --- /dev/null +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -0,0 +1,670 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "storm-parsers/api/properties.h" +#include "storm/api/properties.h" +#include "storm/api/verification.h" + +#include "storm/storage/BitVector.h" +#include "storm/storage/SparseMatrix.h" +#include "storm/utility/macros.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm/utility/SignalHandler.h" +#include "storm/modelchecker/results/CheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" + + +namespace storm { + namespace builder { + template + class BeliefMdpExplorer { + public: + typedef typename PomdpType::ValueType ValueType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef typename BeliefManagerType::BeliefId BeliefId; + typedef uint64_t MdpStateType; + + enum class Status { + Uninitialized, + Exploring, + ModelFinished, + ModelChecked + }; + + BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) { + // Intentionally left empty + } + BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; + + BeliefManagerType const& getBeliefManager() const { + return *beliefManager; + } + + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + status = Status::Exploring; + // Reset data from potential previous explorations + mdpStateToBeliefIdMap.clear(); + beliefIdToMdpStateMap.clear(); + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + mdpStatesToExplore.clear(); + lowerValueBounds.clear(); + upperValueBounds.clear(); + values.clear(); + exploredMdpTransitions.clear(); + exploredChoiceIndices.clear(); + mdpActionRewards.clear(); + exploredMdp = nullptr; + internalAddRowGroupIndex(); // Mark the start of the first row group + + // Add some states with special treatment (if requested) + if (extraBottomStateValue) { + currentMdpState = getCurrentNumberOfMdpStates(); + extraBottomState = currentMdpState; + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); + + internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); + internalAddRowGroupIndex(); + } else { + extraBottomState = boost::none; + } + if (extraTargetStateValue) { + currentMdpState = getCurrentNumberOfMdpStates(); + extraTargetState = currentMdpState; + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); + + internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); + internalAddRowGroupIndex(); + + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(extraTargetState.get(), true); + } else { + extraTargetState = boost::none; + } + currentMdpState = noState(); + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + /*! + * Restarts the exploration to allow re-exploring each state. + * After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty + * This method inserts the initial state of the MDP in the exploration queue. + * While re-exploring, the reference to the old MDP remains valid. + */ + void restartExploration() { + STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); + status = Status::Exploring; + // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + exploredMdpTransitions.clear(); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); + exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); + mdpActionRewards.clear(); + if (exploredMdp->hasRewardModel()) { + // Can be overwritten during exploration + mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); + } + targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + mdpStatesToExplore.clear(); + + // The extra states are not changed + if (extraBottomState) { + currentMdpState = extraBottomState.get(); + restoreOldBehaviorAtCurrentState(0); + } + if (extraTargetState) { + currentMdpState = extraTargetState.get(); + restoreOldBehaviorAtCurrentState(0); + targetStates.set(extraTargetState.get(), true); + } + currentMdpState = noState(); + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + bool hasUnexploredState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return !mdpStatesToExplore.empty(); + } + + BeliefId exploreNextState() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + // Mark the end of the previously explored row group. + if (currentMdpState != noState() && !currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + + // Pop from the queue. + currentMdpState = mdpStatesToExplore.front(); + mdpStatesToExplore.pop_front(); + + + return mdpStateToBeliefIdMap[currentMdpState]; + } + + void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + if (!storm::utility::isZero(bottomStateValue)) { + STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); + internalAddTransition(row, extraBottomState.get(), bottomStateValue); + } + if (!storm::utility::isZero(targetStateValue)) { + STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); + internalAddTransition(row, extraTargetState.get(), targetStateValue); + } + } + + void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + internalAddTransition(row, getCurrentMdpState(), value); + } + + /*! + * Adds the next transition to the given successor belief + * @param localActionIndex + * @param transitionTarget + * @param value + * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. + * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. + */ + bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + + MdpStateType column; + if (ignoreNewBeliefs) { + column = getExploredMdpState(transitionTarget); + if (column == noState()) { + return false; + } + } else { + column = getOrAddMdpState(transitionTarget); + } + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + internalAddTransition(row, column, value); + return true; + } + + void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + } + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; + mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; + } + + void setCurrentStateIsTarget() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(getCurrentMdpState(), true); + } + + void setCurrentStateIsTruncated() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + truncatedStates.grow(getCurrentNumberOfMdpStates(), false); + truncatedStates.set(getCurrentMdpState(), true); + } + + bool currentStateHasOldBehavior() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); + return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); + } + + /*! + * Inserts transitions and rewards at the given action as in the MDP of the previous exploration. + * Does NOT set whether the state is truncated and/or target. + * Will add "old" states that have not been considered before into the exploration queue + * @param localActionIndex + */ + void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); + STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + + uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); + + // Insert the transitions + for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { + internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); + // Check whether exploration is needed + auto beliefId = getBeliefId(transition.getColumn()); + if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state + if (!exploredBeliefIds.get(beliefId)) { + // This belief needs exploration + exploredBeliefIds.set(beliefId, true); + mdpStatesToExplore.push_back(transition.getColumn()); + } + } + } + + // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values + } + + void finishExploration() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + // Complete the exploration + // Finish the last row grouping in case the last explored state was new + if (!currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + // Resize state- and choice based vectors to the correct size + targetStates.resize(getCurrentNumberOfMdpStates(), false); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + } + + // We are not exploring anymore + currentMdpState = noState(); + + // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). + // We get rid of these before rebuilding the model + if (exploredMdp) { + dropUnexploredStates(); + } + + // Create the tranistion matrix + uint64_t entryCount = 0; + for (auto const& row : exploredMdpTransitions) { + entryCount += row.size(); + } + storm::storage::SparseMatrixBuilder builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + builder.newRowGroup(rowIndex); + for (; rowIndex < groupEnd; ++rowIndex) { + for (auto const& entry : exploredMdpTransitions[rowIndex]) { + builder.addNextValue(rowIndex, entry.first, entry.second); + } + } + } + auto mdpTransitionMatrix = builder.build(); + + // Create a standard labeling + storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabelToState("init", initialMdpState); + targetStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("target", std::move(targetStates)); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("truncated", std::move(truncatedStates)); + + // Create a standard reward model (if rewards are available) + std::unordered_map> mdpRewardModels; + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", + storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + } + + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + status = Status::ModelFinished; + STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); + + } + + void dropUnexploredStates() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); + // Find the states (and corresponding choices) that were not explored. + // These correspond to "empty" MDP transitions + storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); + std::vector toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); + MdpStateType nextRelevantIndex = 0; + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + // Check first row in group + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); + relevantMdpStates.set(groupIndex, false); + } else { + toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; + ++nextRelevantIndex; + } + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + // process remaining rows in group + for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { + // Assert that all actions at the current state were consistently explored or unexplored. + STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); + } + } + } + + if (relevantMdpStates.full()) { + // All states are relevant so nothing to do + return; + } + + // Translate various components to the "new" MDP state set + storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); + { // beliefIdToMdpStateMap + for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { + if (relevantMdpStates.get(belIdToMdpStateIt->second)) { + // Translate current entry and move on to the next one. + belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; + ++belIdToMdpStateIt; + } else { + STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); + // Delete current entry and move on to the next one. + // This works because std::map::erase does not invalidate other iterators within the map! + beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); + } + } + } + { // exploredMdpTransitions + storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); + // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map + // It might payoff to do this when building the matrix. + for (auto& transitions : exploredMdpTransitions) { + std::map newTransitions; + for (auto const& entry : transitions) { + STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); + newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); + } + transitions = std::move(newTransitions); + } + } + { // exploredChoiceIndices + MdpStateType newState = 0; + assert(exploredChoiceIndices[0] == 0u); + // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. + for (auto const& oldState : relevantMdpStates) { + if (oldState != newState) { + assert(oldState > newState); + uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; + exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; + } + ++newState; + } + exploredChoiceIndices.resize(newState + 1); + } + if (!mdpActionRewards.empty()) { + storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); + } + if (extraBottomState) { + extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; + } + if (extraTargetState) { + extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; + } + targetStates = targetStates % relevantMdpStates; + truncatedStates = truncatedStates % relevantMdpStates; + initialMdpState = toRelevantStateIndexMap[initialMdpState]; + + storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); + + } + + std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); + return exploredMdp; + } + + MdpStateType getCurrentNumberOfMdpStates() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap.size(); + } + + MdpStateType getCurrentNumberOfMdpChoices() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredMdpTransitions.size(); + } + + MdpStateType getStartOfCurrentRowGroup() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredChoiceIndices[getCurrentMdpState()]; + } + + ValueType getLowerValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return lowerValueBounds[getCurrentMdpState()]; + } + + ValueType getUpperValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return upperValueBounds[getCurrentMdpState()]; + } + + ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); + } + + ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); + } + + void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); + auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); + auto task = createStandardCheckTask(property); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); + if (res) { + values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal()), "Computed values are smaller than the lower bound."); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal()), "Computed values are larger than the upper bound."); + } else { + STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); + STORM_LOG_ERROR("No result obtained while checking."); + } + status = Status::ModelChecked; + } + + bool hasComputedValues() const { + return status == Status::ModelChecked; + } + + std::vector const& getValuesOfExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + return values; + } + + ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); + STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); + return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; + } + + MdpStateType getBeliefId(MdpStateType exploredMdpState) const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap[exploredMdpState]; + } + + struct SuccessorObservationInformation { + SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { + // Intentionally left empty. + } + + void join(SuccessorObservationInformation other) { + observationProbability += other.observationProbability; + maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); + successorWithObsCount += other.successorWithObsCount; + } + + ValueType observationProbability; /// The probability we move to the corresponding observation. + ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. + uint64_t successorWithObsCount; /// The number of successors with this observation + }; + + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + } + + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (beliefId != beliefManager->noId()) { + auto const& obs = beliefManager->getBeliefObservation(beliefId); + SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); + auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); + if (!obsInsertion.second) { + // There already is an entry for this observation, so join the two informations + obsInsertion.first->second.join(info); + } + } + } + } + + + private: + MdpStateType noState() const { + return std::numeric_limits::max(); + } + + std::shared_ptr createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += storm::solver::minimize(dir) ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(values); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + return task; + } + + MdpStateType getCurrentMdpState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return currentMdpState; + } + + MdpStateType getCurrentBeliefId() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return getBeliefId(getCurrentMdpState()); + } + + void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { + STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); + if (row == exploredMdpTransitions.size()) { + exploredMdpTransitions.emplace_back(); + } + STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); + exploredMdpTransitions[row][column] = value; + } + + void internalAddRowGroupIndex() { + exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); + } + + MdpStateType getExploredMdpState(BeliefId const& beliefId) const { + if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { + return beliefIdToMdpStateMap.at(beliefId); + } else { + return noState(); + } + } + + void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { + lowerValueBounds.push_back(lowerBound); + upperValueBounds.push_back(upperBound); + // Take the middle value as a hint + values.push_back((lowerBound + upperBound) / storm::utility::convertNumber(2)); + STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); + STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); + } + + MdpStateType getOrAddMdpState(BeliefId const& beliefId) { + exploredBeliefIds.grow(beliefId + 1, false); + if (exploredBeliefIds.get(beliefId)) { + return beliefIdToMdpStateMap[beliefId]; + } else { + // This state needs exploration + exploredBeliefIds.set(beliefId, true); + + // If this is a restart of the exploration, we still might have an MDP state for the belief + if (exploredMdp) { + auto findRes = beliefIdToMdpStateMap.find(beliefId); + if (findRes != beliefIdToMdpStateMap.end()) { + mdpStatesToExplore.push_back(findRes->second); + return findRes->second; + } + } + // At this point we need to add a new MDP state + MdpStateType result = getCurrentNumberOfMdpStates(); + assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); + mdpStateToBeliefIdMap.push_back(beliefId); + beliefIdToMdpStateMap[beliefId] = result; + insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + mdpStatesToExplore.push_back(result); + return result; + } + } + + // Belief state related information + std::shared_ptr beliefManager; + std::vector mdpStateToBeliefIdMap; + std::map beliefIdToMdpStateMap; + storm::storage::BitVector exploredBeliefIds; + + // Exploration information + std::deque mdpStatesToExplore; + std::vector> exploredMdpTransitions; + std::vector exploredChoiceIndices; + std::vector mdpActionRewards; + uint64_t currentMdpState; + + // Special states during exploration + boost::optional extraTargetState; + boost::optional extraBottomState; + storm::storage::BitVector targetStates; + storm::storage::BitVector truncatedStates; + MdpStateType initialMdpState; + + // Final Mdp + std::shared_ptr> exploredMdp; + + // Value related information + std::vector const& pomdpLowerValueBounds; + std::vector const& pomdpUpperValueBounds; + std::vector lowerValueBounds; + std::vector upperValueBounds; + std::vector values; // Contains an estimate during building and the actual result after a check has performed + + // The current status of this explorer + Status status; + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fb5264ca8..622e63512 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -15,14 +15,10 @@ #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" -#include "storm/modelchecker/results/CheckResult.h" -#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" -#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" -#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" -#include "storm-parsers/api/storm-parsers.h" - +#include "storm-pomdp/builder/BeliefMdpExplorer.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" @@ -31,66 +27,88 @@ namespace storm { namespace pomdp { namespace modelchecker { - template - ApproximatePOMDPModelchecker::Options::Options() { + template + ApproximatePOMDPModelchecker::Options::Options() { initialGridResolution = 10; explorationThreshold = storm::utility::zero(); doRefinement = true; refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; + beliefMdpSizeThreshold = boost::none; + } + + template + ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { + // Intentionally left empty + } + + template + typename ApproximatePOMDPModelchecker::ValueType + ApproximatePOMDPModelchecker::Result::diff(bool relative) const { + ValueType diff = upperBound - lowerBound; + if (diff < storm::utility::zero()) { + STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); + diff = storm::utility::zero(); + } + if (relative && !storm::utility::isZero(upperBound)) { + diff /= upperBound; + } + return diff; } - template - ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { + + template + ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } - template - ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } - template - std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + template + typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { // Reset all collected statistics statistics = Statistics(); - std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); - if (formulaInfo.isNonNestedReachabilityProbability()) { - // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. - STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (!formulaInfo.getSinkStates().empty()) { - auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); - reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; - STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); - } - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); - } else { - result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); - } - } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + + // Compute some initial bounds on the values for each state of the pomdp + auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); + + boost::optional rewardModelName; + if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } } else { - // FIXME: pick the non-unique reward model here - STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + // Expected reward formula! + rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } + + if (options.doRefinement) { + refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } else { + computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } return result; } - template - void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { + template + void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); @@ -116,6 +134,7 @@ namespace storm { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; + stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; } @@ -131,670 +150,270 @@ namespace storm { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; + stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } stream << "##########################################" << std::endl; } + - template - std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, bool computeRewards) { - std::srand(time(NULL)); - // Compute easy upper and lower bounds - storm::utility::Stopwatch underlyingWatch(true); - // Compute the results on the underlying MDP as a basic overapproximation - storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); - // TODO: Is the following really necessary - underlyingMdpLabeling.addLabel("__goal__"); - std::vector goalStates; - for (auto const &targetObs : targetObservations) { - for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { - underlyingMdpLabeling.addLabelToState("__goal__", goalState); - } - } - storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); - auto underlyingModel = std::static_pointer_cast>( - std::make_shared>(underlyingMdp)); - std::string initPropString = computeRewards ? "R" : "P"; - initPropString += min ? "min" : "max"; - initPropString += "=? [F \"__goal__\"]"; - std::vector propVector = storm::api::parseProperties(initPropString); - std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); - STORM_PRINT("Underlying MDP" << std::endl) - if (computeRewards) { - underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel()); + template + void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - underlyingMdp.printModelInformationToStream(std::cout); - std::unique_ptr underlyingRes( - storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underlyingRes, "Result not exist."); - underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); - underlyingWatch.stop(); - - storm::utility::Stopwatch positionalWatch(true); - // we define some positional scheduler for the POMDP as a basic lower bound - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - // select a random action for all states with the same observation - uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); - for (auto const &state : obsStates) { - pomdpScheduler.setChoice(chosenAction, state); + + uint64_t underApproxSizeThreshold = 0; + { // Overapproximation + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx); + if (approx->hasComputedValues()) { + STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.lowerBound : result.upperBound; + resultValue = approx->getComputedValueAtInitialState(); + underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold); + } + } + { // Underapproximation (Uses a fresh Belief manager) + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) { + underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); + } + if (underApproxSizeThreshold == 0) { + underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build) + } + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); + if (approx->hasComputedValues()) { + STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.upperBound : result.lowerBound; + resultValue = approx->getComputedValueAtInitialState(); } } - auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); - if (computeRewards) { - underApproxModel->restrictRewardModels({"std"}); - } - STORM_PRINT("Random Positional Scheduler" << std::endl) - underApproxModel->printModelInformationToStream(std::cout); - std::unique_ptr underapproxRes( - storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underapproxRes, "Result not exist."); - underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); - auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); - positionalWatch.stop(); - - STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " - << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) - STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) - - // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. - // This can probably be improved (i.e. resolutions for single belief states) - STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl) + } + + template + void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - std::set changedObservations; - uint64_t underApproxModelSize = 200; - uint64_t refinementCounter = 1; - STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - initialOverApproxMap, - initialUnderApproxMap, underApproxModelSize); - if (res == nullptr) { - statistics.refinementSteps = 0; - return nullptr; + auto overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + auto underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + overApproxBeliefManager->setRewardModel(rewardModelName); + underApproxBeliefManager->setRewardModel(rewardModelName); + } + + // OverApproximaion + auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (!overApproximation->hasComputedValues()) { + return; + } + ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; + overApproxValue = overApproximation->getComputedValueAtInitialState(); + + // UnderApproximation + uint64_t underApproxSizeThreshold; + if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) { + underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); + } else { + underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); + } + auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (!underApproximation->hasComputedValues()) { + return; } - ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || - (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { + ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; + underApproxValue = underApproximation->getComputedValueAtInitialState(); + + // ValueType lastMinScore = storm::utility::infinity(); + // Start refinement + statistics.refinementSteps = 0; + ValueType refinementAggressiveness = storm::utility::convertNumber(0.0); + while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } - // TODO the actual refinement - // choose which observation(s) to refine - std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); - std::vector beliefCount(pomdp.getNrObservations(), 0); - bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin(); - while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) { - auto currentBelief = res->beliefList[underApproxStateBeliefIter->second]; - beliefCount[currentBelief.observation] += 1; - bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second); - if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) { - // If there is an over-approximate value for the belief, use it - auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first]; - obsAccumulator[currentBelief.observation] += diff; - } else { - //otherwise, we approximate a value TODO this is critical, we have to think about it - auto overApproxValue = storm::utility::zero(); - auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); - auto subSimplex = temp.first; - auto lambdas = temp.second; - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); - bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); - if (approxIter != res->overApproxBeliefStateMap.left.end()) { - overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; - } else { - overApproxValue += lambdas[j]; - } - } - } - obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first]; - } - ++underApproxStateBeliefIter; - } - - - /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - }*/ - changedObservations.clear(); - - //TODO think about some other scoring methods - auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); - //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) { - lastMinScore = maxAvgDifference; - auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - STORM_PRINT("Set all to " << maxRes + 1 << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - observationResolutionVector[i] = maxRes + 1; - changedObservations.insert(i); + ++statistics.refinementSteps.get(); + STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "."); + + // Refine over-approximation + STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << "."); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues()) { + overApproxValue = overApproximation->getComputedValueAtInitialState(); + } else { + break; } - /*} else { - lastMinScore = std::min(maxAvgDifference, lastMinScore); - STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { - //STORM_PRINT(" *** ") - observationResolutionVector[i] += 1; - changedObservations.insert(i); - } - //STORM_PRINT(std::endl) + + if (result.diff() > options.refinementPrecision) { + // Refine under-approximation + underApproxSizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); + STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues()) { + underApproxValue = underApproximation->getComputedValueAtInitialState(); + } else { + break; } - }*/ - if (underApproxModelSize < std::numeric_limits::max() - 101) { - underApproxModelSize += 100; } - STORM_PRINT( - "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); - //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || - cc.isEqual(res->underApproxValue, res->overApproxValue), - "The value for the under-approximation is larger than the value for the over-approximation."); - ++refinementCounter; - } - statistics.refinementSteps = refinementCounter; - if (min) { - return std::make_unique>(POMDPCheckResult{res->underApproxValue, res->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); } } - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, - uint64_t maxUaModelSize) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, - underApproximationMap, maxUaModelSize); - if (result == nullptr) { - return nullptr; - } - if (min) { - return std::make_unique>(POMDPCheckResult{result->underApproxValue, result->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); - } + /*! + * Heuristically rates the quality of the approximation described by the given successor observation info. + * Here, 0 means a bad approximation and 1 means a good approximation. + */ + template + typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { + auto n = storm::utility::convertNumber(info.successorWithObsCount); + auto one = storm::utility::one(); + + // Create the rating for this observation at this choice from the given info + ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) + obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); + return obsChoiceRating; } - - - template - ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { - ValueType result = storm::utility::zero(); - for (auto const& entry : belief) { - result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); - } - return result; + template + std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { + uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); + + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) { + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + auto const& obs = obsInfo.first; + ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); + + // The rating of the observation will be the minimum over all choice-based observation ratings + resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + } + } + return resultingRatings; } - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, - uint64_t maxUaModelSize) { - bool boundMapsSet = overApproximationMap && underApproximationMap; - std::map overMap; - std::map underMap; - if (boundMapsSet) { - overMap = overApproximationMap.value(); - underMap = underApproximationMap.value(); - } - - storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); - bsmap_type beliefStateMap; - - std::deque beliefsToBeExpanded; + template + void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); + + // current maximal resolution (needed for refinement heuristic) + uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); statistics.overApproximationBuildTime.start(); - // Initial belief always has belief ID 0 - auto initialBeliefId = beliefGrid.getInitialBelief(); - auto const& initialBelief = beliefGrid.getGridPoint(initialBeliefId); - auto initialObservation = beliefGrid.getBeliefObservation(initialBelief); - // These are the components to build the MDP from the grid - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; - } - // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector hintVector(nextMdpStateId, storm::utility::zero()); - if (!computeRewards) { - hintVector[extraTargetState] = storm::utility::one(); - } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - - // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - // for the initial belief, add the triangulated initial states - auto triangulation = beliefGrid.triangulateBelief(initialBelief, observationResolutionVector[initialObservation]); - uint64_t initialMdpState = nextMdpStateId; - ++nextMdpStateId; - if (triangulation.size() == 1) { - // The initial belief is on the grid itself - auto initBeliefId = triangulation.gridPoints.front(); - if (boundMapsSet) { - auto const& gridPoint = beliefGrid.getGridPoint(initBeliefId); - weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); + storm::storage::BitVector refinedObservations; + if (!refine) { + // If we build the model from scratch, we first have to setup the explorer for the overApproximation. + if (computeRewards) { + overApproximation->startNewExploration(storm::utility::zero()); + } else { + overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } - beliefsToBeExpanded.push_back(initBeliefId); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); } else { - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - for (uint64_t i = 0; i < triangulation.size(); ++i) { - beliefsToBeExpanded.push_back(triangulation.gridPoints[i]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); - ++nextMdpStateId; - if (boundMapsSet) { - auto const& gridPoint = beliefGrid.getGridPoint(triangulation.gridPoints[i]); - weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); - } - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - ++mdpMatrixRow; - } + // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); + ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); + // Potentially increase the aggressiveness so that at least one observation actually gets refinement. + *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); + refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); + STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + for (auto const& obs : refinedObservations) { + // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) + ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); + observationResolutionVector[obs] += storm::utility::convertNumber(storm::utility::ceil(incrementValue)); + } + overApproximation->restartExploration(); + } + statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } - storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); - } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - - uint64_t currMdpState = beliefStateMap.left.at(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currId); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + // Start exploration + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + while (overApproximation->hasUnexploredState()) { + uint64_t currId = overApproximation->exploreNextState(); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { - // TODO: with rewards we would have to assign the corresponding reward to this transition - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); - ++mdpMatrixRow; + overApproximation->setCurrentStateIsTarget(); + overApproximation->addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - uint64_t someState = beliefGrid.getGridPoint(currId).begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - - for (uint64_t action = 0; action < numChoices; ++action) { - auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector); - // Check for newly found grid points - foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); - for (auto const& successor : successorGridPoints) { - auto successorId = successor.first; - auto const& successorBelief = beliefGrid.getGridPoint(successorId); - auto successorObservation = beliefGrid.getBeliefObservation(successorBelief); - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; - - if (boundMapsSet) { - ValueType upperBound = getWeightedSum(successorBelief, overMap); - ValueType lowerBound = getWeightedSum(successorBelief, underMap); - if (cc.isEqual(upperBound, lowerBound)) { - hintVector.push_back(lowerBound); - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } - weightedSumOverMap[successorId] = upperBound; - weightedSumUnderMap[successorId] = lowerBound; - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } - } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); - } - ++mdpMatrixRow; - } - } - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildAborted = true; - break; - } - } - statistics.overApproximationStates = nextMdpStateId; - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildTime.stop(); - return nullptr; - } - fullyExpandedStates.resize(nextMdpStateId, false); - - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", initialMdpState); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto overApproxMdp = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward - uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); - } - } - } - overApproxMdp->addRewardModel("default", mdpRewardModel); - overApproxMdp->restrictRewardModels(std::set({"default"})); - } - statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - overApproxMdp->printModelInformationToStream(std::cout); - - auto modelPtr = std::static_pointer_cast>(overApproxMdp); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(hintVector); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); - statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); - statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefGrid, targetObservations, min, computeRewards, maxUaModelSize); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - // TODO: return other components needed for refinement. - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, initialBeliefId}); - } - - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - /* TODO: return other components needed for refinement. - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, - underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); - */ - return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBeliefId}); - - } - - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, - uint64_t maxUaModelSize) { - bool initialBoundMapsSet = overApproximationMap && underApproximationMap; - std::map initialOverMap; - std::map initialUnderMap; - if (initialBoundMapsSet) { - initialOverMap = overApproximationMap.value(); - initialUnderMap = underApproximationMap.value(); - } - // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively - std::map>> subSimplexCache; - std::map> lambdaCache; - - // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - statistics.overApproximationBuildTime.start(); - - uint64_t nextBeliefId = refinementComponents->beliefList.size(); - uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { - relevantStates.insert(iter.second); - } - } - - std::set> statesAndActionsToCheck; - for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { - for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { - for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( - state, row).begin(); - itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) { - if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) { - statesAndActionsToCheck.insert(std::make_pair(state, row)); - break; - } - } - } - } - - std::deque beliefsToBeExpanded; - - std::map, std::map> transitionsStateActionPair; - for (auto const &stateActionPair : statesAndActionsToCheck) { - auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); - auto action = stateActionPair.second; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], - action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - //TODO add caching - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } + bool stopExploration = false; + if (storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + overApproximation->setCurrentStateIsTruncated(); } - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + // Check whether we expand this state/action pair + // We always expand if we are not doing refinement of if the state was not available in the "old" MDP. + // Otherwise, a heuristic decides. + bool expandStateAction = true; + if (refine && overApproximation->currentStateHasOldBehavior()) { + // Compute a rating of the current state/action pair + ValueType stateActionRating = storm::utility::one(); + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + if (refinedObservations.get(obsInfo.first)) { + ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], oldMaxResolution); + stateActionRating = std::min(stateActionRating, obsRating); } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } + // Only refine if this rating is below the doubled refinementAggressiveness + expandStateAction = stateActionRating < storm::utility::convertNumber(2.0) * (*refinementAggressiveness); } - } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[stateActionPair] = transitionInActionBelief; - } - } - - std::set stoppedExplorationStateSet; - - // Expand newly added beliefs - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = refinementComponents->beliefIsTarget[currId]; - - if (initialBoundMapsSet && - cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { - STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId] - << std::endl) - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]}, - {0, storm::utility::one() - - weightedSumOverMap[currId]}}; - stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId)); - continue; - } - - if (isTarget) { - // Depending on whether we compute rewards, we select the right initial result - // MDP stuff - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = - {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one()}}; - } else { - uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, - nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + if (expandStateAction) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); } } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; - } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; - } + if (stopExploration) { + if (computeRewards) { + overApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + overApproximation->computeRewardAtCurrentState(action, truncationValueBound); + } + } else { + // Do not refine here + overApproximation->restoreOldBehaviorAtCurrentState(action); } } } @@ -803,351 +422,86 @@ namespace storm { break; } } - - statistics.overApproximationStates = nextStateId; + // TODO: Drop unreachable states (sometimes?) + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); - // Return the result from the old refinement step - return refinementComponents; - } - storm::models::sparse::StateLabeling mdpLabeling(nextStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); - mdpLabeling.addLabelToState("target", 1); - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); - auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix(); - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 0, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 1, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - for (uint64_t state = 2; state < nextStateId; ++state) { - smb.newRowGroup(currentRow); - //STORM_PRINT("Loop State: " << state << std::endl) - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front()); - bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)]; - for (uint64_t action = 0; action < numChoices; ++action) { - if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) { - for (auto const &entry : oldTransitionMatrix.getRow(state, action)) { - smb.addNextValue(currentRow, entry.getColumn(), entry.getValue()); - } - } else { - for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) { - smb.addNextValue(currentRow, iter.first, iter.second); - } - } - ++currentRow; - if (isTarget) { - // If the state is a target, we only have one action, thus we add the target label and stop the iteration - mdpLabeling.addLabelToState("target", state); - break; - } - if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) { - break; - } - } - ++currentRowGroup; + return; } - storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); - } - overApproxMdp.printModelInformationToStream(std::cout); + + overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return refinementComponents; // Return the result from the previous iteration - } - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl) - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, - refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - - return std::make_shared>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, - refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, - underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); } - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); - } - - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, - bool computeRewards, uint64_t maxModelSize) { - std::set visitedBelieves; - std::deque beliefsToBeExpanded; - bsmap_type beliefStateMap; - std::vector>> transitions = {{{{0, storm::utility::one()}}}, - {{{1, storm::utility::one()}}}}; - std::vector targetStates = {1}; - - uint64_t stateId = 2; - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId)); - ++stateId; - uint64_t nextId = beliefList.size(); - uint64_t counter = 0; - + template + void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { + statistics.underApproximationBuildTime.start(); - // Expand the believes - visitedBelieves.insert(initialBeliefId); - beliefsToBeExpanded.push_back(initialBeliefId); - while (!beliefsToBeExpanded.empty()) { - //TODO think of other ways to stop exploration besides model size - auto currentBeliefId = beliefsToBeExpanded.front(); - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); - // for targets, we only consider one action with one transition - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states - targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); - transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); - } else if (counter > maxModelSize) { - transitions.push_back({{{0, storm::utility::one()}}}); + statistics.underApproximationStateLimit = maxStateCount; + if (!underApproximation->hasComputedValues()) { + // Build a new under approximation + if (computeRewards) { + underApproximation->startNewExploration(storm::utility::zero()); } else { - // Iterate over all actions and add the corresponding transitions - std::vector> actionTransitionStorage; - //TODO add a way to extract the actions from the over-approx and use them here? - for (uint64_t action = 0; action < numChoices; ++action) { - std::map transitionsInStateWithAction; - std::map observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action); - for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], - action, - observation, nextId); - nextId = beliefList.size(); - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); - ++stateId; - beliefsToBeExpanded.push_back(nextBeliefId); - ++counter; - } - transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; - } - actionTransitionStorage.push_back(transitionsInStateWithAction); - } - transitions.push_back(actionTransitionStorage); - } - beliefsToBeExpanded.pop_front(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildAborted = true; - break; - } - } - statistics.underApproximationStates = transitions.size(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildTime.stop(); - return nullptr; - } - - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - std::shared_ptr> model; - auto transitionMatrix = buildTransitionMatrix(transitions); - if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { - transitionMatrix.makeRowGroupingTrivial(); - } - storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); - storm::models::sparse::Mdp underApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } + underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); - } - model = std::make_shared>(underApproxMdp); - - model->printModelInformationToStream(std::cout); - statistics.underApproximationBuildTime.stop(); - - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); - statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, - std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize) { - // Build the belief MDP until enough states are explored. - //TODO think of other ways to stop exploration besides model size - - statistics.underApproximationBuildTime.start(); - - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; + // Restart the building process + underApproximation->restartExploration(); } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - bsmap_type beliefStateMap; - std::deque beliefsToBeExpanded; - - auto initialBeliefId = beliefGrid.getInitialBelief(); - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId)); - beliefsToBeExpanded.push_back(initialBeliefId); - ++nextMdpStateId; - - // Expand the believes - storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); - } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - - uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefGrid.getGridPoint(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + // Expand the beliefs + while (underApproximation->hasUnexploredState()) { + uint64_t currId = underApproximation->exploreNextState(); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (currMdpState > maxModelSize) { - if (min) { - // Get an upper bound here - if (computeRewards) { - // TODO: With minimizing rewards we need an upper bound! - // In other cases, this could be helpflull as well. - // For now, add a selfloop to "generate" infinite reward - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one()); + underApproximation->setCurrentStateIsTarget(); + underApproximation->addSelfloopTransition(); + } else { + bool stopExploration = false; + if (!underApproximation->currentStateHasOldBehavior()) { + if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); } - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one()); } - ++mdpMatrixRow; - } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - // Iterate over all actions and add the corresponding transitions - uint64_t someState = currBelief.begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - for (uint64_t action = 0; action < numChoices; ++action) { - auto successorBeliefs = beliefGrid.expand(currId, action); - // Check for newly found beliefs - foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); - for (auto const& successor : successorBeliefs) { - auto successorId = successor.first; - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + // Always restore old behavior if available + if (underApproximation->currentStateHasOldBehavior()) { + underApproximation->restoreOldBehaviorAtCurrentState(action); + } else { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); + } + } + if (stopExploration) { + if (computeRewards) { + underApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + underApproximation->computeRewardAtCurrentState(action, truncationValueBound); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; } } if (storm::utility::resources::isTerminate()) { @@ -1155,296 +509,24 @@ namespace storm { break; } } - statistics.underApproximationStates = nextMdpStateId; + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); - return nullptr; - } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId)); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); + return; } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto model = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward - uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); - } - } - } - model->addRewardModel("default", mdpRewardModel); - model->restrictRewardModels(std::set({"default"})); - } - - model->printModelInformationToStream(std::cout); + underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); - } - - - template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - uint64_t nrColumns = transitions.size(); - uint64_t nrRows = 0; - uint64_t nrEntries = 0; - for (auto const &actionTransitions : transitions) { - for (auto const &map : actionTransitions) { - nrEntries += map.size(); - ++nrRows; - } - } - storm::storage::SparseMatrixBuilder smb(nrRows, nrColumns, nrEntries, true, true); - for (auto const &actionTransitions : transitions) { - smb.newRowGroup(currentRow); - for (auto const &map : actionTransitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - ++currentRowGroup; - } - return smb.build(); - } - - template - uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( - std::vector> const &grid, uint32_t observation, - std::map &probabilities) { - // TODO This one is quite slow - for (auto const &belief : grid) { - if (belief.observation == observation) { - bool same = true; - for (auto const &probEntry : belief.probabilities) { - if (probabilities.find(probEntry.first) == probabilities.end()) { - same = false; - break; - } - if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) { - same = false; - break; - } - } - if (same) { - return belief.id; - } - } - } - return -1; - } - - template - storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief(uint64_t id) { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - std::map distribution; - uint32_t observation = 0; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (pomdp.getInitialStates()[state] == 1) { - distribution[state] = storm::utility::one(); - observation = pomdp.getObservation(state); - break; - } - } - return storm::pomdp::Belief{id, observation, distribution}; - } - - template - std::pair>, std::vector> - ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::map &probabilities, uint64_t resolution, uint64_t nrStates) { - - //TODO this can also be simplified using the sparse vector interpretation - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : probabilities) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; - } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } - } - } - } - std::vector> subSimplex(nrStates); - for (size_t j = 0; j < nrStates; ++j) { - for (size_t i = 0; i < nrStates - 1; ++i) { - if (cc.isLess(storm::utility::zero(), qs[j][i] - qs[j][i + 1])) { - subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; - } - } - - if (cc.isLess(storm::utility::zero(), qs[j][nrStates - 1])) { - subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution; - } - } - - std::vector lambdas(nrStates, storm::utility::zero()); - auto sum = storm::utility::zero(); - for (size_t i = 1; i < nrStates; ++i) { - lambdas[i] = d[p[i - 1]] - d[p[i]]; - sum += d[p[i - 1]] - d[p[i]]; - } - lambdas[0] = storm::utility::one() - sum; - - return std::make_pair(subSimplex, lambdas); - } - - template - std::map - ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::pomdp::Belief &belief, - uint64_t actionIndex) { - std::map res; - // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::map postProbabilities; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (entry.getValue() > 0) { - postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - for (auto const &probEntry : postProbabilities) { - uint32_t observation = pomdp.getObservation(probEntry.first); - if (res.count(observation) == 0) { - res[observation] = probEntry.second; - } else { - res[observation] += probEntry.second; - } - } - - return res; } - template - uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, - uint32_t observation, uint64_t id) { - std::map distributionAfter; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (pomdp.getObservation(entry.getColumn()) == observation) { - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - // We have to normalize the distribution - auto sum = storm::utility::zero(); - for (auto const &entry : distributionAfter) { - sum += entry.second; - } - - for (auto const &entry : distributionAfter) { - distributionAfter[entry.first] /= sum; - } - if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); - return res; - } else { - beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - return id; - } - } - - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, std::map const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief.probabilities) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - - template - class ApproximatePOMDPModelchecker; - -#ifdef STORM_HAVE_CARL - - template - class ApproximatePOMDPModelchecker; + template class ApproximatePOMDPModelchecker>; + template class ApproximatePOMDPModelchecker>; -#endif } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 32daa0876..823eebf60 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -3,7 +3,8 @@ #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" -#include "storm-pomdp/storage/BeliefGrid.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/builder/BeliefMdpExplorer.h" #include #include "storm/storage/jani/Property.h" @@ -17,41 +18,13 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - template - struct POMDPCheckResult { - ValueType overApproxValue; - ValueType underApproxValue; - }; - - /** - * Struct containing information which is supposed to be persistent over multiple refinement steps - * - */ - template> - struct RefinementComponents { - std::shared_ptr> overApproxModelPtr; - ValueType overApproxValue; - ValueType underApproxValue; - std::map overApproxMap; - std::map underApproxMap; - std::vector> beliefList; - std::vector> beliefGrid; - std::vector beliefIsTarget; - bsmap_type overApproxBeliefStateMap; - bsmap_type underApproxBeliefStateMap; - uint64_t initialBeliefId; - }; - - template> - struct UnderApproxComponents { - ValueType underApproxValue; - std::map underApproxMap; - bsmap_type underApproxBeliefStateMap; - }; - - template> + template class ApproximatePOMDPModelchecker { public: + typedef typename PomdpModelType::ValueType ValueType; + typedef typename PomdpModelType::RewardModelType RewardModelType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::builder::BeliefMdpExplorer ExplorerType; struct Options { Options(); @@ -61,72 +34,23 @@ namespace storm { ValueType refinementPrecision; /// Used to decide when the refinement should terminate ValueType numericPrecision; /// Used to decide whether two values are equal bool cacheSubsimplices; /// Enables caching of subsimplices + boost::optional beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection. + }; + + struct Result { + Result(ValueType lower, ValueType upper); + ValueType lowerBound; + ValueType upperBound; + ValueType diff (bool relative = false) const; }; - ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); + ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); - std::unique_ptr> check(storm::logic::Formula const& formula); + Result check(storm::logic::Formula const& formula); void printStatisticsToStream(std::ostream& stream) const; private: - /** - * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop - * - * @param targetObservations the set of observations to be reached - * @param min true if minimum probability is to be computed - * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - refineReachability(std::set const &targetObservations, bool min, bool computeRewards); - - /** - * Compute the reachability probability of given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation - * - * @param targetObservations the set of observations to be reached - * @param min true if minimum probability is to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min); - - /** - * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation - * - * @param targetObservations the set of observations to be reached - * @param min true if minimum rewards are to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityRewardOTF(std::set const &targetObservations, bool min); - - private: - /** - * Helper method to compute the inital step of the refinement loop - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return struct containing components generated during the computation to be used in later refinement iterations - */ - std::shared_ptr> - computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); - - std::shared_ptr> - computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); - /** * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * @@ -139,114 +63,32 @@ namespace storm { * @param maxUaModelSize the maximum size of the underapproximation model to be generated * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); - - /** - * Helper to compute an underapproximation of the reachability property. - * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. - * - * @param beliefList vector containing already generated beliefs - * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target - * @param targetObservations set of target observations - * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state - * @param min true if minimum value is to be computed - * @param computeReward true if rewards are to be computed - * @param maxModelSize number of states up until which the belief support should be unrolled - * @return struct containing the components generated during the under approximation - */ - std::unique_ptr> computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, bool computeReward, - uint64_t maxModelSize); - std::unique_ptr> computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, - std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize); - - /** - * Constructs the initial belief for the given POMDP - * - * @param pomdp the POMDP - * @param id the id the initial belief is given - * @return a belief representing the initial belief - */ - storm::pomdp::Belief - getInitialBelief(uint64_t id); - - - /** - * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation - * - * @param probabilities the probability distribution of the belief - * @param gridResolution the resolution used for the belief - * @param nrStates number of states in the POMDP - * @return a pair containing: 1) the subsimplices 2) the lambda values - */ - std::pair>, std::vector> - computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); - - - /** - * Helper method to get the probabilities to be in a state with each observation after performing an action - * - * @param belief the belief in which the action is performed - * @param actionIndex the index of the action to be performed - * @return mapping from each observation to the probability to be in a state with that observation after performing the action - */ - std::map computeObservationProbabilitiesAfterAction(storm::pomdp::Belief &belief, - uint64_t actionIndex); - - /** - * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. - * If the belief does not exist yet, it is created and added to the list of all beliefs - * - * @param beliefList data structure to store all generated beliefs - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param targetObservations set of target observations - * @param belief the starting belief - * @param actionIndex the index of the action to be performed - * @param observation the observation after the action was performed - * @return the resulting belief (observation and distribution) - */ - uint64_t getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - storm::pomdp::Belief &belief, - uint64_t actionIndex, uint32_t observation, uint64_t id); - + void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); + + /** - * Helper to get the id of a Belief stored in a given vector structure + * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop * - * @param grid the vector on which the lookup is performed - * @param observation the observation of the belief - * @param probabilities the probability distribution over the POMDP states of the Belief - * @return if the belief was found in the vector, the belief's ID, otherwise -1 + * @param targetObservations the set of observations to be reached + * @param min true if minimum probability is to be computed + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::map &probabilities); + void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); /** - * Helper method to build the transition matrix from a data structure containing transations - * - * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) - * @return sparseMatrix representing the transitions + * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); + void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** - * Get the reward for performing an action in a given belief - * - * @param action the index of the action to be performed - * @param belief the belief in which the action is performed - * @return the reward earned by performing the action in the belief + * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); - ValueType getRewardAfterAction(uint64_t action, std::map const& belief); + void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); + ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); + + std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution); + struct Statistics { Statistics(); boost::optional refinementSteps; @@ -255,17 +97,19 @@ namespace storm { bool overApproximationBuildAborted; storm::utility::Stopwatch overApproximationBuildTime; storm::utility::Stopwatch overApproximationCheckTime; + boost::optional overApproximationMaxResolution; boost::optional underApproximationStates; bool underApproximationBuildAborted; storm::utility::Stopwatch underApproximationBuildTime; storm::utility::Stopwatch underApproximationCheckTime; + boost::optional underApproximationStateLimit; bool aborted; }; Statistics statistics; - storm::models::sparse::Pomdp const& pomdp; + PomdpModelType const& pomdp; Options options; storm::utility::ConstantsComparator cc; }; diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h new file mode 100644 index 000000000..ca4c2192f --- /dev/null +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -0,0 +1,117 @@ +#pragma once + +#include "storm-pomdp/analysis/FormulaInformation.h" + +#include "storm/api/verification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/storage/Scheduler.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + class TrivialPomdpValueBoundsModelChecker { + public: + typedef typename PomdpType::ValueType ValueType; + TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { + // Intentionally left empty + } + + struct ValueBounds { + std::vector lower; + std::vector upper; + }; + ValueBounds getValueBounds(storm::logic::Formula const& formula) { + return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); + } + + ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { + STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); + // Compute the values on the fully observable MDP + // We need an actual MDP here so that the apply scheduler method below will work. + // Also, the api call in the next line will require a copy anyway. + auto underlyingMdp = std::make_shared>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); + auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Create some positional scheduler for the POMDP + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + // For each state, we heuristically find a good distribution over output actions. + std::vector fullyObservableChoiceValues(pomdp.getNumberOfChoices()); + if (info.isNonNestedExpectedRewardFormula()) { + std::vector actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); + } else { + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); + } + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + storm::storage::Distribution choiceDistribution; + for (auto const &state : obsStates) { + ValueType const& stateValue = fullyObservableResult[state]; + assert(stateValue >= storm::utility::zero()); + for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { + ValueType const& choiceValue = fullyObservableChoiceValues[choice]; + assert(choiceValue >= storm::utility::zero()); + // Rate this choice by considering the relative difference between the choice value and the (optimal) state value + ValueType choiceRating; + if (stateValue < choiceValue) { + choiceRating = choiceValue - stateValue; + if (!storm::utility::isZero(choiceValue)) { + choiceRating /= choiceValue; + } + } else { + choiceRating = stateValue - choiceValue; + if (!storm::utility::isZero(stateValue)) { + choiceRating /= stateValue; + } + } + assert(choiceRating <= storm::utility::one()); + assert(choiceRating >= storm::utility::zero()); + // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice + if (choiceRating <= storm::utility::convertNumber(0.5)) { + choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one() - choiceRating); + } + } + } + choiceDistribution.normalize(); + for (auto const& state : obsStates) { + pomdpScheduler.setChoice(choiceDistribution, state); + } + } + STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + + auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Finally prepare the result + ValueBounds result; + if (info.minimize()) { + result.lower = std::move(fullyObservableResult); + result.upper = std::move(pomdpSchedulerResult); + } else { + result.lower = std::move(pomdpSchedulerResult); + result.upper = std::move(fullyObservableResult); + } + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal()), "Lower bound is larger than upper bound"); + return result; + } + + private: + PomdpType const& pomdp; + }; + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h deleted file mode 100644 index b0661fcc5..000000000 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ /dev/null @@ -1,354 +0,0 @@ -#pragma once - -#include -#include -//#include - -#include "storm/utility/macros.h" -#include "storm/exceptions/UnexpectedException.h" - -namespace storm { - namespace storage { - - template - // TODO: Change name. This actually does not only manage grid points. - class BeliefGrid { - public: - - typedef typename PomdpType::ValueType ValueType; - //typedef boost::container::flat_map BeliefType - typedef std::map BeliefType; - typedef uint64_t BeliefId; - - BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { - // Intentionally left empty - } - - struct Triangulation { - std::vector gridPoints; - std::vector weights; - uint64_t size() const { - return weights.size(); - } - }; - - BeliefType const& getGridPoint(BeliefId const& id) const { - return gridPoints[id]; - } - - BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const { - auto idIt = gridPointToIdMap.find(gridPoint); - STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state."); - return idIt->second; - } - - std::string toString(BeliefType const& belief) const { - std::stringstream str; - str << "{ "; - bool first = true; - for (auto const& entry : belief) { - if (first) { - first = false; - } else { - str << ", "; - } - str << entry.first << ": " << entry.second; - } - str << " }"; - return str.str(); - } - - std::string toString(Triangulation const& t) const { - std::stringstream str; - str << "(\n"; - for (uint64_t i = 0; i < t.size(); ++i) { - str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n"; - } - str <<")\n"; - return str.str(); - } - - bool isEqual(BeliefType const& first, BeliefType const& second) const { - if (first.size() != second.size()) { - return false; - } - auto secondIt = second.begin(); - for (auto const& firstEntry : first) { - if (firstEntry.first != secondIt->first) { - return false; - } - if (!cc.isEqual(firstEntry.second, secondIt->second)) { - return false; - } - ++secondIt; - } - return true; - } - - bool assertBelief(BeliefType const& belief) const { - BeliefValueType sum = storm::utility::zero(); - boost::optional observation; - for (auto const& entry : belief) { - uintmax_t entryObservation = pomdp.getObservation(entry.first); - if (observation) { - if (observation.get() != entryObservation) { - STORM_LOG_ERROR("Beliefsupport contains different observations."); - return false; - } - } else { - observation = entryObservation; - } - if (cc.isZero(entry.second)) { - // We assume that beliefs only consider their support. - STORM_LOG_ERROR("Zero belief probability."); - return false; - } - if (cc.isLess(entry.second, storm::utility::zero())) { - STORM_LOG_ERROR("Negative belief probability."); - return false; - } - if (cc.isLess(storm::utility::one(), entry.second)) { - STORM_LOG_ERROR("Belief probability greater than one."); - return false; - } - sum += entry.second; - } - if (!cc.isOne(sum)) { - STORM_LOG_ERROR("Belief does not sum up to one."); - return false; - } - return true; - } - - bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { - if (triangulation.weights.size() != triangulation.gridPoints.size()) { - STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); - return false; - } - if (triangulation.size() == 0) { - STORM_LOG_ERROR("Empty triangulation."); - return false; - } - BeliefType triangulatedBelief; - BeliefValueType weightSum = storm::utility::zero(); - for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { - if (cc.isZero(triangulation.weights[i])) { - STORM_LOG_ERROR("Zero weight in triangulation."); - return false; - } - if (cc.isLess(triangulation.weights[i], storm::utility::zero())) { - STORM_LOG_ERROR("Negative weight in triangulation."); - return false; - } - if (cc.isLess(storm::utility::one(), triangulation.weights[i])) { - STORM_LOG_ERROR("Weight greater than one in triangulation."); - } - weightSum += triangulation.weights[i]; - BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]); - for (auto const& pointEntry : gridPoint) { - BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; - triangulatedValue += triangulation.weights[i] * pointEntry.second; - } - } - if (!cc.isOne(weightSum)) { - STORM_LOG_ERROR("Triangulation weights do not sum up to one."); - return false; - } - if (!assertBelief(triangulatedBelief)) { - STORM_LOG_ERROR("Triangulated belief is not a belief."); - } - if (!isEqual(belief, triangulatedBelief)) { - STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); - return false; - } - return true; - } - - BeliefId getInitialBelief() { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - BeliefType belief; - belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); - - STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); - return getOrAddGridPointId(belief); - } - - uint32_t getBeliefObservation(BeliefType belief) { - STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); - return pomdp.getObservation(belief.begin()->first); - } - - uint32_t getBeliefObservation(BeliefId beliefId) { - return getBeliefObservation(getGridPoint(beliefId)); - } - - - Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { - //TODO this can also be simplified using the sparse vector interpretation - //TODO Enable chaching for this method? - STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); - - auto nrStates = pomdp.getNumberOfStates(); - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - // TODO avoid reallocations for these vectors - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : belief) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; - } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } - } - } - } - - Triangulation result; - // The first weight is 1-sum(other weights). We therefore process the js in reverse order - BeliefValueType firstWeight = storm::utility::one(); - for (size_t j = nrStates; j > 0;) { - --j; - // First create the weights. The weights vector will be reversed at the end. - ValueType weight; - if (j == 0) { - weight = firstWeight; - } else { - weight = d[p[j - 1]] - d[p[j]]; - firstWeight -= weight; - } - if (!cc.isZero(weight)) { - result.weights.push_back(weight); - BeliefType gridPoint; - auto const& qsj = qs[j]; - for (size_t i = 0; i < nrStates - 1; ++i) { - BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1]; - if (!cc.isZero(gridPointEntry)) { - gridPoint[i] = gridPointEntry / convResolution; - } - } - if (!cc.isZero(qsj[nrStates - 1])) { - gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; - } - result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); - } - } - - STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); - return result; - } - - template - void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { - auto insertionRes = distr.emplace(state, value); - if (!insertionRes.second) { - insertionRes.first->second += value; - } - } - - BeliefId getNumberOfGridPointIds() const { - return gridPoints.size(); - } - - std::map expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { - std::map destinations; // The belief ids should be ordered - // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds - - BeliefType gridPoint = getGridPoint(gridPointId); - - // Find the probability we go to each observation - BeliefType successorObs; // This is actually not a belief but has the same type - for (auto const& pointEntry : gridPoint) { - uint64_t state = pointEntry.first; - for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { - if (!storm::utility::isZero(pomdpTransition.getValue())) { - auto obs = pomdp.getObservation(pomdpTransition.getColumn()); - addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); - } - } - } - - // Now for each successor observation we find and potentially triangulate the successor belief - for (auto const& successor : successorObs) { - BeliefType successorBelief; - for (auto const& pointEntry : gridPoint) { - uint64_t state = pointEntry.first; - for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { - if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { - ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; - addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); - } - } - } - STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); - - if (observationTriangulationResolutions) { - Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); - for (size_t j = 0; j < triangulation.size(); ++j) { - addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); - } - } else { - addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second); - } - } - - return destinations; - - } - - std::map expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { - return expandInternal(gridPointId, actionIndex, observationResolutions); - } - - std::map expand(BeliefId const& gridPointId, uint64_t actionIndex) { - return expandInternal(gridPointId, actionIndex); - } - - private: - - BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { - auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size()); - if (insertioRes.second) { - // There actually was an insertion, so add the new grid state - gridPoints.push_back(gridPoint); - } - // Return the id - return insertioRes.first->second; - } - - PomdpType const& pomdp; - - std::vector gridPoints; - std::map gridPointToIdMap; - storm::utility::ConstantsComparator cc; - - - }; - } -} \ No newline at end of file diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h new file mode 100644 index 000000000..25ec3a3d2 --- /dev/null +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -0,0 +1,436 @@ +#pragma once + +#include +#include +#include +#include "storm/adapters/RationalNumberAdapter.h" +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { + namespace storage { + + template + class BeliefManager { + public: + + typedef typename PomdpType::ValueType ValueType; + typedef boost::container::flat_map BeliefType; // iterating over this shall be ordered (for correct hash computation) + typedef uint64_t BeliefId; + + BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + initialBeliefId = computeInitialBelief(); + } + + void setRewardModel(boost::optional rewardModelName = boost::none) { + if (rewardModelName) { + auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get()); + pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix()); + } else { + setRewardModel(pomdp.getUniqueRewardModelName()); + } + } + + void unsetRewardModel() { + pomdpActionRewardVector.clear(); + } + + struct Triangulation { + std::vector gridPoints; + std::vector weights; + uint64_t size() const { + return weights.size(); + } + }; + + BeliefId noId() const { + return std::numeric_limits::max(); + } + + bool isEqual(BeliefId const& first, BeliefId const& second) const { + return isEqual(getBelief(first), getBelief(second)); + } + + std::string toString(BeliefId const& beliefId) const { + return toString(getBelief(beliefId)); + } + + + std::string toString(Triangulation const& t) const { + std::stringstream str; + str << "(\n"; + for (uint64_t i = 0; i < t.size(); ++i) { + str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; + } + str <<")\n"; + return str.str(); + } + + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + + BeliefId const& getInitialBelief() const { + return initialBeliefId; + } + + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); + STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); + auto result = storm::utility::zero(); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (auto const &entry : belief) { + uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); + STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); + result += entry.second * pomdpActionRewardVector[choiceIndex]; + } + return result; + } + + uint32_t getBeliefObservation(BeliefId beliefId) { + return getBeliefObservation(getBelief(beliefId)); + } + + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + + Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + return triangulateBelief(getBelief(beliefId), resolution); + } + + template + void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { + auto insertionRes = distr.emplace(state, value); + if (!insertionRes.second) { + insertionRes.first->second += value; + } + } + + BeliefId getNumberOfBeliefIds() const { + return beliefs.size(); + } + + std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(beliefId, actionIndex, observationResolutions); + } + + std::vector> expand(BeliefId const& beliefId, uint64_t actionIndex) { + return expandInternal(beliefId, actionIndex); + } + + private: + + BeliefType const& getBelief(BeliefId const& id) const { + STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); + STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); + return beliefs[id]; + } + + BeliefId getId(BeliefType const& belief) const { + auto idIt = beliefToIdMap.find(belief); + STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief."); + return idIt->second; + } + + std::string toString(BeliefType const& belief) const { + std::stringstream str; + str << "{ "; + bool first = true; + for (auto const& entry : belief) { + if (first) { + first = false; + } else { + str << ", "; + } + str << entry.first << ": " << entry.second; + } + str << " }"; + return str.str(); + } + + bool isEqual(BeliefType const& first, BeliefType const& second) const { + if (first.size() != second.size()) { + return false; + } + auto secondIt = second.begin(); + for (auto const& firstEntry : first) { + if (firstEntry.first != secondIt->first) { + return false; + } + if (!cc.isEqual(firstEntry.second, secondIt->second)) { + return false; + } + ++secondIt; + } + return true; + } + + bool assertBelief(BeliefType const& belief) const { + BeliefValueType sum = storm::utility::zero(); + boost::optional observation; + for (auto const& entry : belief) { + if (entry.first >= pomdp.getNumberOfStates()) { + STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << "."); + return false; + } + uint64_t entryObservation = pomdp.getObservation(entry.first); + if (observation) { + if (observation.get() != entryObservation) { + STORM_LOG_ERROR("Beliefsupport contains different observations."); + return false; + } + } else { + observation = entryObservation; + } + if (cc.isZero(entry.second)) { + // We assume that beliefs only consider their support. + STORM_LOG_ERROR("Zero belief probability."); + return false; + } + if (cc.isLess(entry.second, storm::utility::zero())) { + STORM_LOG_ERROR("Negative belief probability."); + return false; + } + if (cc.isLess(storm::utility::one(), entry.second)) { + STORM_LOG_ERROR("Belief probability greater than one."); + return false; + } + sum += entry.second; + } + if (!cc.isOne(sum)) { + STORM_LOG_ERROR("Belief does not sum up to one."); + return false; + } + return true; + } + + bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { + if (triangulation.weights.size() != triangulation.gridPoints.size()) { + STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); + return false; + } + if (triangulation.size() == 0) { + STORM_LOG_ERROR("Empty triangulation."); + return false; + } + BeliefType triangulatedBelief; + BeliefValueType weightSum = storm::utility::zero(); + for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { + if (cc.isZero(triangulation.weights[i])) { + STORM_LOG_ERROR("Zero weight in triangulation."); + return false; + } + if (cc.isLess(triangulation.weights[i], storm::utility::zero())) { + STORM_LOG_ERROR("Negative weight in triangulation."); + return false; + } + if (cc.isLess(storm::utility::one(), triangulation.weights[i])) { + STORM_LOG_ERROR("Weight greater than one in triangulation."); + } + weightSum += triangulation.weights[i]; + BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]); + for (auto const& pointEntry : gridPoint) { + BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; + triangulatedValue += triangulation.weights[i] * pointEntry.second; + } + } + if (!cc.isOne(weightSum)) { + STORM_LOG_ERROR("Triangulation weights do not sum up to one."); + return false; + } + if (!assertBelief(triangulatedBelief)) { + STORM_LOG_ERROR("Triangulated belief is not a belief."); + } + if (!isEqual(belief, triangulatedBelief)) { + STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << "."); + return false; + } + return true; + } + + uint32_t getBeliefObservation(BeliefType belief) { + STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); + return pomdp.getObservation(belief.begin()->first); + } + + struct FreudenthalDiff { + FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { }; + StateType dimension; // i + BeliefValueType diff; // d[i] + bool operator>(FreudenthalDiff const& other) const { + if (diff != other.diff) { + return diff > other.diff; + } else { + return dimension < other.dimension; + } + } + }; + + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { + STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); + StateType numEntries = belief.size(); + Triangulation result; + + // Quickly triangulate Dirac beliefs + if (numEntries == 1u) { + result.weights.push_back(storm::utility::one()); + result.gridPoints.push_back(getOrAddBeliefId(belief)); + } else { + + auto convResolution = storm::utility::convertNumber(resolution); + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are mostly based on the paper + // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). + // Initialize diffs and the first row of the 'qs' matrix (aka v) + std::set> sorted_diffs; // d (and p?) in the paper + std::vector qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v + qsRow.reserve(numEntries); + std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices + toOriginalIndicesMap.reserve(numEntries); + BeliefValueType x = convResolution; + for (auto const& entry : belief) { + qsRow.push_back(storm::utility::floor(x)); // v + sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v + toOriginalIndicesMap.push_back(entry.first); + x -= entry.second * convResolution; + } + // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler + qsRow.push_back(storm::utility::zero()); + + result.weights.reserve(numEntries); + result.gridPoints.reserve(numEntries); + auto currentSortedDiff = sorted_diffs.begin(); + auto previousSortedDiff = sorted_diffs.end(); + --previousSortedDiff; + for (StateType i = 0; i < numEntries; ++i) { + // Compute the weight for the grid points + BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; + if (i == 0) { + // The first weight is a bit different + weight += storm::utility::one(); + } else { + // 'compute' the next row of the qs matrix + qsRow[previousSortedDiff->dimension] += storm::utility::one(); + } + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + // Compute the grid point + BeliefType gridPoint; + for (StateType j = 0; j < numEntries; ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; + } + } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); + } + previousSortedDiff = currentSortedDiff++; + } + } + STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); + return result; + } + + std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::vector> destinations; + + BeliefType belief = getBelief(beliefId); + + // Find the probability we go to each observation + BeliefType successorObs; // This is actually not a belief but has the same type + for (auto const& pointEntry : belief) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (!storm::utility::isZero(pomdpTransition.getValue())) { + auto obs = pomdp.getObservation(pomdpTransition.getColumn()); + addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); + } + } + } + + // Now for each successor observation we find and potentially triangulate the successor belief + for (auto const& successor : successorObs) { + BeliefType successorBelief; + for (auto const& pointEntry : belief) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { + ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; + addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); + } + } + } + STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); + + // Insert the destination. We know that destinations have to be disjoined since they have different observations + if (observationTriangulationResolutions) { + Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); + for (size_t j = 0; j < triangulation.size(); ++j) { + // Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times + destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + } + } else { + destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second); + } + } + + return destinations; + + } + + BeliefId computeInitialBelief() { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, + "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); + BeliefType belief; + belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); + + STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); + return getOrAddBeliefId(belief); + } + + BeliefId getOrAddBeliefId(BeliefType const& belief) { + auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size()); + if (insertioRes.second) { + // There actually was an insertion, so add the new belief + beliefs.push_back(belief); + } + // Return the id + return insertioRes.first->second; + } + + struct BeliefHash { + std::size_t operator()(const BeliefType& belief) const { + std::size_t seed = 0; + // Assumes that beliefs are ordered + for (auto const& entry : belief) { + boost::hash_combine(seed, entry.first); + boost::hash_combine(seed, entry.second); + } + return seed; + } + }; + + PomdpType const& pomdp; + std::vector pomdpActionRewardVector; + + std::vector beliefs; + std::unordered_map beliefToIdMap; + BeliefId initialBeliefId; + + storm::utility::ConstantsComparator cc; + + + }; + } +} \ No newline at end of file diff --git a/src/storm/models/sparse/StandardRewardModel.cpp b/src/storm/models/sparse/StandardRewardModel.cpp index 159c39446..b017ae58a 100644 --- a/src/storm/models/sparse/StandardRewardModel.cpp +++ b/src/storm/models/sparse/StandardRewardModel.cpp @@ -349,11 +349,6 @@ namespace storm { return result; } - template - void StandardRewardModel::setStateActionRewardValue(uint_fast64_t row, ValueType const& value) { - this->optionalStateActionRewardVector.get()[row] = value; - } - template template void StandardRewardModel::clearRewardAtState(uint_fast64_t state, storm::storage::SparseMatrix const& transitions) { diff --git a/src/storm/models/sparse/StandardRewardModel.h b/src/storm/models/sparse/StandardRewardModel.h index 2bfed8143..aedd7c535 100644 --- a/src/storm/models/sparse/StandardRewardModel.h +++ b/src/storm/models/sparse/StandardRewardModel.h @@ -287,15 +287,6 @@ namespace storm { template storm::storage::BitVector getChoicesWithFilter(storm::storage::SparseMatrix const& transitionMatrix, std::function const& filter) const; - /*! - * Sets the given value in the state-action reward vector at the given row. This assumes that the reward - * model has state-action rewards. - * - * @param row The row at which to set the given value. - * @param value The value to set. - */ - void setStateActionRewardValue(uint_fast64_t row, ValueType const& value); - /*! * Retrieves whether the reward model is empty, i.e. contains no state-, state-action- or transition-based * rewards. diff --git a/src/storm/settings/SettingsManager.cpp b/src/storm/settings/SettingsManager.cpp index bff3aa37d..027a24df6 100644 --- a/src/storm/settings/SettingsManager.cpp +++ b/src/storm/settings/SettingsManager.cpp @@ -403,7 +403,7 @@ namespace storm { STORM_LOG_THROW(moduleIterator == this->modules.end(), storm::exceptions::IllegalFunctionCallException, "Unable to register module '" << moduleSettings->getModuleName() << "' because a module with the same name already exists."); // Take over the module settings object. - std::string const& moduleName = moduleSettings->getModuleName(); + std::string moduleName = moduleSettings->getModuleName(); this->moduleNames.push_back(moduleName); this->modules.emplace(moduleSettings->getModuleName(), std::move(moduleSettings)); auto iterator = this->modules.find(moduleName); diff --git a/src/storm/storage/Distribution.cpp b/src/storm/storage/Distribution.cpp index f40afb402..2290c611c 100644 --- a/src/storm/storage/Distribution.cpp +++ b/src/storm/storage/Distribution.cpp @@ -166,7 +166,17 @@ namespace storm { } } - + template + void Distribution::normalize() { + ValueType sum = storm::utility::zero(); + for (auto const& entry: distribution) { + sum += entry.second; + } + for (auto& entry: distribution) { + entry.second /= sum; + } + } + template class Distribution; template std::ostream& operator<<(std::ostream& out, Distribution const& distribution); diff --git a/src/storm/storage/Distribution.h b/src/storm/storage/Distribution.h index d7e0bd2fb..c3ac58dcc 100644 --- a/src/storm/storage/Distribution.h +++ b/src/storm/storage/Distribution.h @@ -144,6 +144,11 @@ namespace storm { */ ValueType getProbability(StateType const& state) const; + /*! + * Normalizes the distribution such that the values sum up to one. + */ + void normalize(); + private: // A list of states and the probabilities that are assigned to them. container_type distribution; diff --git a/src/storm/storage/dd/cudd/InternalCuddBdd.cpp b/src/storm/storage/dd/cudd/InternalCuddBdd.cpp index a850d6329..647c6b7ce 100644 --- a/src/storm/storage/dd/cudd/InternalCuddBdd.cpp +++ b/src/storm/storage/dd/cudd/InternalCuddBdd.cpp @@ -196,7 +196,7 @@ namespace storm { // Open the file, dump the DD and close it again. std::vector cuddBddVector = { this->getCuddBdd() }; - FILE* filePointer = fopen(filename.c_str() , "w"); + FILE* filePointer = fopen(filename.c_str() , "a+"); if (showVariablesIfPossible) { ddManager->getCuddManager().DumpDot(cuddBddVector, ddVariableNames.data(), &ddNames[0], filePointer); } else { diff --git a/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp b/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp index 3907f0625..1f30f23fb 100644 --- a/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp +++ b/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp @@ -410,7 +410,7 @@ namespace storm { if (isStateReachable(modelState, memoryState)) { if (scheduler && scheduler->getChoice(modelState, memoryState).isDefined()) { ValueType factor = scheduler->getChoice(modelState, memoryState).getChoiceAsDistribution().getProbability(rowOffset); - stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] = factor * modelStateActionReward; + stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] += factor * modelStateActionReward; } else { stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)] + rowOffset] = modelStateActionReward; } diff --git a/src/storm/utility/vector.h b/src/storm/utility/vector.h index dd562a8e1..af6098038 100644 --- a/src/storm/utility/vector.h +++ b/src/storm/utility/vector.h @@ -142,6 +142,12 @@ namespace storm { return true; } + template + bool compareElementWise(std::vector const& left, std::vector const& right, Comparator comp = std::less()) { + STORM_LOG_ASSERT(left.size() == right.size(), "Expected that vectors for comparison have equal size"); + return std::equal(left.begin(), left.end(), right.begin(), comp); + } + /*! * Selects the elements from a vector at the specified positions and writes them consecutively into another vector. * @param vector The vector into which the selected elements are to be written.