From 48395f121884256fe2982afb6e986b027a48b91a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sun, 29 Mar 2020 16:11:24 +0200 Subject: [PATCH 01/40] Cmake: Fixed capitalization of z3 and hwloc to silence some cmake warnings that recently appear. --- resources/3rdparty/CMakeLists.txt | 2 +- .../cmake/find_modules/{FindHwloc.cmake => FindHWLOC.cmake} | 0 resources/cmake/find_modules/FindZ3.cmake | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename resources/cmake/find_modules/{FindHwloc.cmake => FindHWLOC.cmake} (100%) diff --git a/resources/3rdparty/CMakeLists.txt b/resources/3rdparty/CMakeLists.txt index 46e927d85..acd1fefd1 100644 --- a/resources/3rdparty/CMakeLists.txt +++ b/resources/3rdparty/CMakeLists.txt @@ -496,7 +496,7 @@ add_dependencies(sylvan_STATIC sylvan) list(APPEND STORM_DEP_TARGETS sylvan_STATIC) -find_package(Hwloc QUIET REQUIRED) +find_package(HWLOC QUIET REQUIRED) if(HWLOC_FOUND) message(STATUS "Storm - Linking with hwloc ${HWLOC_VERSION}.") add_imported_library(hwloc STATIC ${HWLOC_LIBRARIES} "") diff --git a/resources/cmake/find_modules/FindHwloc.cmake b/resources/cmake/find_modules/FindHWLOC.cmake similarity index 100% rename from resources/cmake/find_modules/FindHwloc.cmake rename to resources/cmake/find_modules/FindHWLOC.cmake diff --git a/resources/cmake/find_modules/FindZ3.cmake b/resources/cmake/find_modules/FindZ3.cmake index d0978b3bc..6c6a7beb1 100644 --- a/resources/cmake/find_modules/FindZ3.cmake +++ b/resources/cmake/find_modules/FindZ3.cmake @@ -32,7 +32,7 @@ set(Z3_SOLVER ${Z3_EXEC}) # set the LIBZ3_FOUND variable by utilizing the following macro # (which also handles the REQUIRED and QUIET arguments) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(z3 DEFAULT_MSG +find_package_handle_standard_args(Z3 DEFAULT_MSG Z3_LIBRARY Z3_INCLUDE_DIR) IF (NOT Z3_FIND_QUIETLY) From 1603f0569de88a81d74d1efb43021aad8ddaf3c3 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sun, 29 Mar 2020 20:27:45 +0200 Subject: [PATCH 02/40] Silenced a gcc warning. --- src/storm-cli-utilities/model-handling.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/storm-cli-utilities/model-handling.h b/src/storm-cli-utilities/model-handling.h index 0e4fe4812..6dad6fbb0 100644 --- a/src/storm-cli-utilities/model-handling.h +++ b/src/storm-cli-utilities/model-handling.h @@ -276,6 +276,7 @@ namespace storm { case ModelProcessingInformation::ValueType::FinitePrecision: return storm::utility::canHandle(mpi.engine, input.preprocessedProperties.is_initialized() ? input.preprocessedProperties.get() : input.properties, input.model.get()); } + return false; }; mpi.isCompatible = checkCompatibleSettings(); if (!mpi.isCompatible) { From dc7aabc2f1c8a5dd8dd55ba6fcc09bd0e55a593e Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Sun, 29 Mar 2020 20:28:26 +0200 Subject: [PATCH 03/40] Fixed moving a reference away. --- src/storm/settings/SettingsManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm/settings/SettingsManager.cpp b/src/storm/settings/SettingsManager.cpp index 6a694d09d..da236433c 100644 --- a/src/storm/settings/SettingsManager.cpp +++ b/src/storm/settings/SettingsManager.cpp @@ -404,7 +404,7 @@ namespace storm { STORM_LOG_THROW(moduleIterator == this->modules.end(), storm::exceptions::IllegalFunctionCallException, "Unable to register module '" << moduleSettings->getModuleName() << "' because a module with the same name already exists."); // Take over the module settings object. - std::string const& moduleName = moduleSettings->getModuleName(); + std::string moduleName = moduleSettings->getModuleName(); this->moduleNames.push_back(moduleName); this->modules.emplace(moduleSettings->getModuleName(), std::move(moduleSettings)); auto iterator = this->modules.find(moduleName); From 743dc3e8b149318d315780e1f47f29fa58a19ee9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 09:01:05 +0200 Subject: [PATCH 04/40] Cmake: Silence some cmake warnings that recently appear (part 2) --- resources/3rdparty/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/resources/3rdparty/CMakeLists.txt b/resources/3rdparty/CMakeLists.txt index acd1fefd1..2a5f13dab 100644 --- a/resources/3rdparty/CMakeLists.txt +++ b/resources/3rdparty/CMakeLists.txt @@ -170,8 +170,8 @@ if(Z3_FOUND) set(STORM_Z3_API_USES_STANDARD_INTEGERS ON) endif() - add_imported_library(z3 SHARED ${Z3_LIBRARIES} ${Z3_INCLUDE_DIRS}) - list(APPEND STORM_DEP_TARGETS z3_SHARED) + add_imported_library(Z3 SHARED ${Z3_LIBRARIES} ${Z3_INCLUDE_DIRS}) + list(APPEND STORM_DEP_TARGETS Z3_SHARED) else() message(WARNING "Storm - Could not obtain Z3 version. Building of Prism/JANI models will not be supported.") set(Z3_FOUND FALSE) @@ -197,13 +197,13 @@ include(${STORM_3RDPARTY_SOURCE_DIR}/include_glpk.cmake) ############################################################# if (STORM_USE_GUROBI) - find_package(Gurobi QUIET REQUIRED) + find_package(GUROBI QUIET REQUIRED) set(STORM_HAVE_GUROBI ${GUROBI_FOUND}) if (GUROBI_FOUND) if (EXISTS ${GUROBI_LIBRARY}) message (STATUS "Storm - Linking with Gurobi (${GUROBI_CXX_LIBRARY}).") - add_imported_library(Gurobi SHARED ${GUROBI_LIBRARY} ${GUROBI_INCLUDE_DIRS}) - list(APPEND STORM_DEP_TARGETS Gurobi_SHARED) + add_imported_library(GUROBI SHARED ${GUROBI_LIBRARY} ${GUROBI_INCLUDE_DIRS}) + list(APPEND STORM_DEP_TARGETS GUROBI_SHARED) else() # The FindGurobi.cmake script needs to be updated every now and then as the library file contains the version number... message(FATAL_ERROR "Gurobi Library ${GUROBI_LIBRARY} not found. If your Gurobi Version is higher then 9.0.0, please contact the Storm developers.") @@ -499,8 +499,8 @@ list(APPEND STORM_DEP_TARGETS sylvan_STATIC) find_package(HWLOC QUIET REQUIRED) if(HWLOC_FOUND) message(STATUS "Storm - Linking with hwloc ${HWLOC_VERSION}.") - add_imported_library(hwloc STATIC ${HWLOC_LIBRARIES} "") - list(APPEND STORM_DEP_TARGETS hwloc_STATIC) + add_imported_library(HWLOC STATIC ${HWLOC_LIBRARIES} "") + list(APPEND STORM_DEP_TARGETS HWLOC_STATIC) else() if(${OPERATING_SYSTEM} MATCHES "Linux") message(FATAL_ERROR "HWLOC is required on Linux but was not found.") From 98bb48d3c54e2d35b607c00f07036ea1caf5f9b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:07:49 +0200 Subject: [PATCH 05/40] BeliefGrid: Adding support for rewards. --- src/storm-pomdp/storage/BeliefGrid.h | 33 +++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h index b0661fcc5..61fe3b370 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -24,6 +24,19 @@ namespace storm { // Intentionally left empty } + void setRewardModel(boost::optional rewardModelName = boost::none) { + if (rewardModelName) { + auto const& rewardModel = pomdp.getRewardModel(rewardModelName.get()); + pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix()); + } else { + setRewardModel(pomdp.getUniqueRewardModelName()); + } + } + + void unsetRewardModel() { + pomdpActionRewardVector.clear(); + } + struct Triangulation { std::vector gridPoints; std::vector weights; @@ -89,7 +102,11 @@ namespace storm { BeliefValueType sum = storm::utility::zero(); boost::optional observation; for (auto const& entry : belief) { - uintmax_t entryObservation = pomdp.getObservation(entry.first); + if (entry.first >= pomdp.getNumberOfStates()) { + STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << "."); + return false; + } + uint64_t entryObservation = pomdp.getObservation(entry.first); if (observation) { if (observation.get() != entryObservation) { STORM_LOG_ERROR("Beliefsupport contains different observations."); @@ -176,6 +193,19 @@ namespace storm { return getOrAddGridPointId(belief); } + ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { + STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); + auto result = storm::utility::zero(); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (auto const &entry : belief) { + uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); + STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); + result += entry.second * pomdpActionRewardVector[choiceIndex]; + } + return result; + } + uint32_t getBeliefObservation(BeliefType belief) { STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); return pomdp.getObservation(belief.begin()->first); @@ -343,6 +373,7 @@ namespace storm { } PomdpType const& pomdp; + std::vector pomdpActionRewardVector; std::vector gridPoints; std::map gridPointToIdMap; From a3e92d2f72952e759effe1cf765f7d7cad87a2a9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:18:06 +0200 Subject: [PATCH 06/40] Using the new reward functionalities of BliefGrid. This also fixes setting rewards in a wrong way (previously, the same reward was assigned to states with the same observation). --- .../ApproximatePOMDPModelchecker.cpp | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fb5264ca8..3aeb81d18 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -137,7 +137,28 @@ namespace storm { stream << "##########################################" << std::endl; } - + + std::shared_ptr createStandardProperty(bool min, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + template + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property, std::vector&& hintVector) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + if (!hintVector.empty()) { + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(std::move(hintVector)); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + } + return task; + } template std::unique_ptr> @@ -360,6 +381,10 @@ namespace storm { } storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); + if (computeRewards) { + beliefGrid.setRewardModel(); + } + bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; @@ -520,37 +545,27 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); auto overApproxMdp = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); } } } overApproxMdp->addRewardModel("default", mdpRewardModel); - overApproxMdp->restrictRewardModels(std::set({"default"})); } statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); overApproxMdp->printModelInformationToStream(std::cout); auto modelPtr = std::static_pointer_cast>(overApproxMdp); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(hintVector); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); + auto property = createStandardProperty(min, computeRewards); + auto task = createStandardCheckTask(property, std::move(hintVector)); + statistics.overApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); statistics.overApproximationCheckTime.stop(); @@ -1172,16 +1187,14 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); auto model = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); } } } @@ -1192,17 +1205,11 @@ namespace storm { model->printModelInformationToStream(std::cout); statistics.underApproximationBuildTime.stop(); - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - + auto property = createStandardProperty(min, computeRewards); + auto task = createStandardCheckTask(property, std::vector()); + statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { return nullptr; From 87c855531219755df5920538dca0c7755462a1b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:20:13 +0200 Subject: [PATCH 07/40] Using the new reward functionalities of BliefGrid. This also fixes setting rewards in a wrong way (previously, the same reward was assigned to states with the same observation). Added auxiliary functions for creating properties. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 3aeb81d18..eac2dad1b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1207,8 +1207,8 @@ namespace storm { auto property = createStandardProperty(min, computeRewards); auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { From 0b552e68132ed8028d9eb3f27e87f41ead80edfc Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 13:13:40 +0200 Subject: [PATCH 08/40] Renamed BeliefGrid to BeliefManager --- .../ApproximatePOMDPModelchecker.cpp | 69 +++++++------- .../ApproximatePOMDPModelchecker.h | 4 +- .../storage/{BeliefGrid.h => BeliefManager.h} | 91 ++++++++++--------- 3 files changed, 85 insertions(+), 79 deletions(-) rename src/storm-pomdp/storage/{BeliefGrid.h => BeliefManager.h} (87%) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index eac2dad1b..6728535f5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -380,9 +380,9 @@ namespace storm { underMap = underApproximationMap.value(); } - storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); + auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); if (computeRewards) { - beliefGrid.setRewardModel(); + beliefManager->setRewardModel(); // TODO: get actual name } bsmap_type beliefStateMap; @@ -391,9 +391,7 @@ namespace storm { statistics.overApproximationBuildTime.start(); // Initial belief always has belief ID 0 - auto initialBeliefId = beliefGrid.getInitialBelief(); - auto const& initialBelief = beliefGrid.getGridPoint(initialBeliefId); - auto initialObservation = beliefGrid.getBeliefObservation(initialBelief); + auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief()); // These are the components to build the MDP from the grid // Reserve states 0 and 1 as always sink/goal states storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); @@ -419,14 +417,14 @@ namespace storm { std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - auto triangulation = beliefGrid.triangulateBelief(initialBelief, observationResolutionVector[initialObservation]); + auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]); uint64_t initialMdpState = nextMdpStateId; ++nextMdpStateId; if (triangulation.size() == 1) { // The initial belief is on the grid itself auto initBeliefId = triangulation.gridPoints.front(); if (boundMapsSet) { - auto const& gridPoint = beliefGrid.getGridPoint(initBeliefId); + auto const& gridPoint = beliefManager->getBelief(initBeliefId); weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); } @@ -443,7 +441,7 @@ namespace storm { beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); ++nextMdpStateId; if (boundMapsSet) { - auto const& gridPoint = beliefGrid.getGridPoint(triangulation.gridPoints[i]); + auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]); weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); } @@ -458,7 +456,7 @@ namespace storm { if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); + storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); for (auto const& belId : beliefsToBeExpanded) { foundBeliefs.set(belId, true); } @@ -467,7 +465,7 @@ namespace storm { beliefsToBeExpanded.pop_front(); uint64_t currMdpState = beliefStateMap.left.at(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currId); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); @@ -484,17 +482,17 @@ namespace storm { } else { fullyExpandedStates.grow(nextMdpStateId, false); fullyExpandedStates.set(currMdpState, true); - uint64_t someState = beliefGrid.getGridPoint(currId).begin()->first; + uint64_t someState = beliefManager->getBelief(currId).begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); for (uint64_t action = 0; action < numChoices; ++action) { - auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); // Check for newly found grid points - foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorGridPoints) { auto successorId = successor.first; - auto const& successorBelief = beliefGrid.getGridPoint(successorId); - auto successorObservation = beliefGrid.getBeliefObservation(successorBelief); + auto const& successorBelief = beliefManager->getBelief(successorId); + auto successorObservation = beliefManager->getBeliefObservation(successorBelief); if (!foundBeliefs.get(successorId)) { foundBeliefs.set(successorId); beliefsToBeExpanded.push_back(successorId); @@ -548,11 +546,11 @@ namespace storm { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefManager->getBelief(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); } } } @@ -575,16 +573,16 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; + auto overApprox = overApproxResultMap[initialMdpState]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefGrid, targetObservations, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, initialBeliefId}); + return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -595,7 +593,7 @@ namespace storm { underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); */ return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBeliefId}); + underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); } @@ -1075,7 +1073,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxModelSize) { // Build the belief MDP until enough states are explored. @@ -1100,13 +1098,12 @@ namespace storm { bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; - auto initialBeliefId = beliefGrid.getInitialBelief(); - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId)); - beliefsToBeExpanded.push_back(initialBeliefId); + beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId)); + beliefsToBeExpanded.push_back(beliefManager->getInitialBelief()); ++nextMdpStateId; // Expand the believes - storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); + storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); for (auto const& belId : beliefsToBeExpanded) { foundBeliefs.set(belId, true); } @@ -1115,8 +1112,8 @@ namespace storm { beliefsToBeExpanded.pop_front(); uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefGrid.getGridPoint(currId); - uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); + auto const& currBelief = beliefManager->getBelief(currId); + uint32_t currObservation = beliefManager->getBeliefObservation(currBelief); mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); @@ -1147,9 +1144,9 @@ namespace storm { uint64_t someState = currBelief.begin()->first; uint64_t numChoices = pomdp.getNumberOfChoices(someState); for (uint64_t action = 0; action < numChoices; ++action) { - auto successorBeliefs = beliefGrid.expand(currId, action); + auto successorBeliefs = beliefManager->expand(currId, action); // Check for newly found beliefs - foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); + foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorBeliefs) { auto successorId = successor.first; if (!foundBeliefs.get(successorId)) { @@ -1159,7 +1156,7 @@ namespace storm { ++nextMdpStateId; } auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. + // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted. mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } ++mdpMatrixRow; @@ -1179,7 +1176,7 @@ namespace storm { storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId)); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief())); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } @@ -1190,11 +1187,11 @@ namespace storm { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefManager->getBelief(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); } } } @@ -1207,8 +1204,8 @@ namespace storm { auto property = createStandardProperty(min, computeRewards); auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); + statistics.underApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { @@ -1217,7 +1214,7 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; + auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())]; return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 32daa0876..a97d36cb2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -3,7 +3,7 @@ #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" -#include "storm-pomdp/storage/BeliefGrid.h" +#include "storm-pomdp/storage/BeliefManager.h" #include #include "storm/storage/jani/Property.h" @@ -163,7 +163,7 @@ namespace storm { std::set const &targetObservations, uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); - std::unique_ptr> computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeReward, uint64_t maxModelSize); diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefManager.h similarity index 87% rename from src/storm-pomdp/storage/BeliefGrid.h rename to src/storm-pomdp/storage/BeliefManager.h index 61fe3b370..efc6bee2a 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -11,8 +11,7 @@ namespace storm { namespace storage { template - // TODO: Change name. This actually does not only manage grid points. - class BeliefGrid { + class BeliefManager { public: typedef typename PomdpType::ValueType ValueType; @@ -20,8 +19,8 @@ namespace storm { typedef std::map BeliefType; typedef uint64_t BeliefId; - BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { - // Intentionally left empty + BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + initialBeliefId = computeInitialBelief(); } void setRewardModel(boost::optional rewardModelName = boost::none) { @@ -45,13 +44,13 @@ namespace storm { } }; - BeliefType const& getGridPoint(BeliefId const& id) const { - return gridPoints[id]; + BeliefType const& getBelief(BeliefId const& id) const { + return beliefs[id]; } - BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const { - auto idIt = gridPointToIdMap.find(gridPoint); - STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state."); + BeliefId getId(BeliefType const& belief) const { + auto idIt = beliefToIdMap.find(belief); + STORM_LOG_THROW(idIt != beliefToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown Belief."); return idIt->second; } @@ -75,7 +74,7 @@ namespace storm { std::stringstream str; str << "(\n"; for (uint64_t i = 0; i < t.size(); ++i) { - str << "\t" << t.weights[i] << " * \t" << toString(getGridPoint(t.gridPoints[i])) << "\n"; + str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; } str <<")\n"; return str.str(); @@ -161,7 +160,7 @@ namespace storm { STORM_LOG_ERROR("Weight greater than one in triangulation."); } weightSum += triangulation.weights[i]; - BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]); + BeliefType const& gridPoint = getBelief(triangulation.gridPoints[i]); for (auto const& pointEntry : gridPoint) { BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; triangulatedValue += triangulation.weights[i] * pointEntry.second; @@ -181,16 +180,8 @@ namespace storm { return true; } - BeliefId getInitialBelief() { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - BeliefType belief; - belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); - - STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); - return getOrAddGridPointId(belief); + BeliefId const& getInitialBelief() const { + return initialBeliefId; } ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { @@ -212,10 +203,10 @@ namespace storm { } uint32_t getBeliefObservation(BeliefId beliefId) { - return getBeliefObservation(getGridPoint(beliefId)); + return getBeliefObservation(getBelief(beliefId)); } - + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? @@ -286,7 +277,7 @@ namespace storm { if (!cc.isZero(qsj[nrStates - 1])) { gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; } - result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } } @@ -294,6 +285,10 @@ namespace storm { return result; } + Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + return triangulateBelief(getBelief(beliefId), resolution); + } + template void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { auto insertionRes = distr.emplace(state, value); @@ -302,19 +297,19 @@ namespace storm { } } - BeliefId getNumberOfGridPointIds() const { - return gridPoints.size(); + BeliefId getNumberOfBeliefIds() const { + return beliefs.size(); } - std::map expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::map destinations; // The belief ids should be ordered // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds - BeliefType gridPoint = getGridPoint(gridPointId); + BeliefType belief = getBelief(beliefId); // Find the probability we go to each observation BeliefType successorObs; // This is actually not a belief but has the same type - for (auto const& pointEntry : gridPoint) { + for (auto const& pointEntry : belief) { uint64_t state = pointEntry.first; for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { if (!storm::utility::isZero(pomdpTransition.getValue())) { @@ -327,7 +322,7 @@ namespace storm { // Now for each successor observation we find and potentially triangulate the successor belief for (auto const& successor : successorObs) { BeliefType successorBelief; - for (auto const& pointEntry : gridPoint) { + for (auto const& pointEntry : belief) { uint64_t state = pointEntry.first; for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { @@ -344,7 +339,7 @@ namespace storm { addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); } } else { - addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second); + addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second); } } @@ -352,21 +347,33 @@ namespace storm { } - std::map expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { - return expandInternal(gridPointId, actionIndex, observationResolutions); + std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(beliefId, actionIndex, observationResolutions); } - std::map expand(BeliefId const& gridPointId, uint64_t actionIndex) { - return expandInternal(gridPointId, actionIndex); + std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { + return expandInternal(beliefId, actionIndex); } private: - BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { - auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size()); + BeliefId computeInitialBelief() { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, + "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); + BeliefType belief; + belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); + + STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); + return getOrAddBeliefId(belief); + } + + BeliefId getOrAddBeliefId(BeliefType const& belief) { + auto insertioRes = beliefToIdMap.emplace(belief, beliefs.size()); if (insertioRes.second) { - // There actually was an insertion, so add the new grid state - gridPoints.push_back(gridPoint); + // There actually was an insertion, so add the new belief + beliefs.push_back(belief); } // Return the id return insertioRes.first->second; @@ -375,8 +382,10 @@ namespace storm { PomdpType const& pomdp; std::vector pomdpActionRewardVector; - std::vector gridPoints; - std::map gridPointToIdMap; + std::vector beliefs; + std::map beliefToIdMap; + BeliefId initialBeliefId; + storm::utility::ConstantsComparator cc; From 41199ea5c7b14868568deb4dbffbf20487d65294 Mon Sep 17 00:00:00 2001 From: Matthias Volk Date: Mon, 30 Mar 2020 14:55:21 +0200 Subject: [PATCH 09/40] Append in dot output for DDs --- src/storm/storage/dd/cudd/InternalCuddBdd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm/storage/dd/cudd/InternalCuddBdd.cpp b/src/storm/storage/dd/cudd/InternalCuddBdd.cpp index a850d6329..647c6b7ce 100644 --- a/src/storm/storage/dd/cudd/InternalCuddBdd.cpp +++ b/src/storm/storage/dd/cudd/InternalCuddBdd.cpp @@ -196,7 +196,7 @@ namespace storm { // Open the file, dump the DD and close it again. std::vector cuddBddVector = { this->getCuddBdd() }; - FILE* filePointer = fopen(filename.c_str() , "w"); + FILE* filePointer = fopen(filename.c_str() , "a+"); if (showVariablesIfPossible) { ddManager->getCuddManager().DumpDot(cuddBddVector, ddVariableNames.data(), &ddNames[0], filePointer); } else { From b8ac41f561b175c8469f85d75c6cb9148253112b Mon Sep 17 00:00:00 2001 From: Matthias Volk Date: Mon, 30 Mar 2020 18:11:13 +0200 Subject: [PATCH 10/40] Fixed problem with stormpy by changing boost::optional arguments to const& in GSPNs --- src/storm-gspn/storage/gspn/GspnBuilder.cpp | 4 ++-- src/storm-gspn/storage/gspn/GspnBuilder.h | 4 ++-- src/storm-gspn/storage/gspn/Place.cpp | 2 +- src/storm-gspn/storage/gspn/Place.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/storm-gspn/storage/gspn/GspnBuilder.cpp b/src/storm-gspn/storage/gspn/GspnBuilder.cpp index c39b5c0f7..41ef64f96 100644 --- a/src/storm-gspn/storage/gspn/GspnBuilder.cpp +++ b/src/storm-gspn/storage/gspn/GspnBuilder.cpp @@ -13,7 +13,7 @@ namespace storm { gspnName = name; } - uint_fast64_t GspnBuilder::addPlace(boost::optional capacity, uint_fast64_t const& initialTokens, std::string const& name) { + uint_fast64_t GspnBuilder::addPlace(boost::optional const& capacity, uint_fast64_t const& initialTokens, std::string const& name) { auto newId = places.size(); auto place = storm::gspn::Place(newId); place.setCapacity(capacity); @@ -70,7 +70,7 @@ namespace storm { return addTimedTransition(priority, rate, 1, name); } - uint_fast64_t GspnBuilder::addTimedTransition(uint_fast64_t const &priority, double const &rate, boost::optional numServers, std::string const& name) { + uint_fast64_t GspnBuilder::addTimedTransition(uint_fast64_t const &priority, double const &rate, boost::optional const& numServers, std::string const& name) { auto trans = storm::gspn::TimedTransition(); auto newId = GSPN::timedTransitionIdToTransitionId(timedTransitions.size()); trans.setName(name); diff --git a/src/storm-gspn/storage/gspn/GspnBuilder.h b/src/storm-gspn/storage/gspn/GspnBuilder.h index 5e4db4a34..fb4cd2ad7 100644 --- a/src/storm-gspn/storage/gspn/GspnBuilder.h +++ b/src/storm-gspn/storage/gspn/GspnBuilder.h @@ -25,7 +25,7 @@ namespace storm { * A capacity of -1 indicates an unbounded place. * @param initialTokens The number of inital tokens in the place. */ - uint_fast64_t addPlace(boost::optional capacity = 1, uint_fast64_t const& initialTokens = 0, std::string const& name = ""); + uint_fast64_t addPlace(boost::optional const& capacity = 1, uint_fast64_t const& initialTokens = 0, std::string const& name = ""); void setPlaceLayoutInfo(uint64_t placeId, LayoutInfo const& layoutInfo); @@ -51,7 +51,7 @@ namespace storm { * @param rate The rate for the transition. * @param numServers The number of servers this transition has (in case of K-Server semantics) or boost::none (in case of Infinite-Server-Semantics). */ - uint_fast64_t addTimedTransition(uint_fast64_t const &priority, RateType const& rate, boost::optional numServers, std::string const& name = ""); + uint_fast64_t addTimedTransition(uint_fast64_t const &priority, RateType const& rate, boost::optional const& numServers, std::string const& name = ""); void setTransitionLayoutInfo(uint64_t transitionId, LayoutInfo const& layoutInfo); diff --git a/src/storm-gspn/storage/gspn/Place.cpp b/src/storm-gspn/storage/gspn/Place.cpp index ca9a4a9e1..8d7347936 100644 --- a/src/storm-gspn/storage/gspn/Place.cpp +++ b/src/storm-gspn/storage/gspn/Place.cpp @@ -29,7 +29,7 @@ namespace storm { return this->numberOfInitialTokens; } - void Place::setCapacity(boost::optional cap) { + void Place::setCapacity(boost::optional const& cap) { this->capacity = cap; } diff --git a/src/storm-gspn/storage/gspn/Place.h b/src/storm-gspn/storage/gspn/Place.h index 55c9da6fb..b6a8b4ca1 100644 --- a/src/storm-gspn/storage/gspn/Place.h +++ b/src/storm-gspn/storage/gspn/Place.h @@ -56,7 +56,7 @@ namespace storm { * @param capacity The capacity of this place. A non-negative number represents the capacity. * boost::none indicates that the flag is not set. */ - void setCapacity(boost::optional capacity); + void setCapacity(boost::optional const& capacity); /*! * Returns the capacity of tokens of this place. From 37da2b4e1fae34993c05c6153735fc433ad65dfa Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 13:58:45 +0200 Subject: [PATCH 11/40] Added a new model checker that allows to compute trivial (but sound) bounds on the value of POMDP states --- .../TrivialPomdpValueBoundsModelChecker.h | 115 ++++++++++++++++++ src/storm/storage/Distribution.cpp | 12 +- src/storm/storage/Distribution.h | 5 + 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h new file mode 100644 index 000000000..862a82a05 --- /dev/null +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -0,0 +1,115 @@ +#pragma once + +#include "storm-pomdp/analysis/FormulaInformation.h" + +#include "storm/api/verification.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/models/sparse/StandardRewardModel.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/storage/Scheduler.h" + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" +#include "storm/exceptions/NotSupportedException.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + class TrivialPomdpValueBoundsModelChecker { + public: + typedef typename PomdpType::ValueType ValueType; + TrivialPomdpValueBoundsModelChecker(PomdpType const& pomdp) : pomdp(pomdp) { + // Intentionally left empty + } + + struct ValueBounds { + std::vector lower; + std::vector upper; + }; + ValueBounds getValueBounds(storm::logic::Formula const& formula) { + return getValueBounds(formula, storm::pomdp::analysis::getFormulaInformation(pomdp, formula)); + } + + ValueBounds getValueBounds(storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info) { + STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis."); + // Compute the values on the fully observable MDP + // We need an actual MDP here so that the apply scheduler method below will work. + // Also, the api call in the next line will require a copy anyway. + auto underlyingMdp = std::make_shared>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels()); + auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Create some positional scheduler for the POMDP + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + // For each state, we heuristically find a good distribution over output actions. + std::vector fullyObservableChoiceValues(pomdp.getNumberOfChoices()); + if (info.isNonNestedExpectedRewardFormula()) { + std::vector actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix()); + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues, &actionBasedRewards); + } else { + pomdp.getTransitionMatrix().multiplyWithVector(fullyObservableResult, fullyObservableChoiceValues); + } + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + storm::storage::Distribution choiceDistribution; + for (auto const &state : obsStates) { + ValueType const& stateValue = fullyObservableResult[state]; + assert(stateValue >= storm::utility::zero()); + for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) { + ValueType const& choiceValue = fullyObservableChoiceValues[choice]; + assert(choiceValue >= storm::utility::zero()); + // Rate this choice by considering the relative difference between the choice value and the (optimal) state value + ValueType choiceRating; + if (stateValue < choiceValue) { + choiceRating = choiceValue - stateValue; + if (!storm::utility::isZero(choiceValue)) { + choiceRating /= choiceValue; + } + } else { + choiceRating = stateValue - choiceValue; + if (!storm::utility::isZero(stateValue)) { + choiceRating /= stateValue; + } + } + assert(choiceRating <= storm::utility::one()); + assert(choiceRating >= storm::utility::zero()); + // choiceRating = 0 is a very good choice, choiceRating = 1 is a very bad choice + if (choiceRating <= storm::utility::convertNumber(0.5)) { + choiceDistribution.addProbability(choice - choiceIndices[state], storm::utility::one() - choiceRating); + } + } + } + choiceDistribution.normalize(); + for (auto const& state : obsStates) { + pomdpScheduler.setChoice(choiceDistribution, state); + } + } + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + + auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr2, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr2->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector pomdpSchedulerResult = std::move(resultPtr2->template asExplicitQuantitativeCheckResult().getValueVector()); + + // Finally prepare the result + ValueBounds result; + if (info.minimize()) { + result.lower = std::move(fullyObservableResult); + result.upper = std::move(pomdpSchedulerResult); + } else { + result.lower = std::move(pomdpSchedulerResult); + result.upper = std::move(fullyObservableResult); + } + return result; + } + + private: + PomdpType const& pomdp; + }; + } + } +} \ No newline at end of file diff --git a/src/storm/storage/Distribution.cpp b/src/storm/storage/Distribution.cpp index f40afb402..2290c611c 100644 --- a/src/storm/storage/Distribution.cpp +++ b/src/storm/storage/Distribution.cpp @@ -166,7 +166,17 @@ namespace storm { } } - + template + void Distribution::normalize() { + ValueType sum = storm::utility::zero(); + for (auto const& entry: distribution) { + sum += entry.second; + } + for (auto& entry: distribution) { + entry.second /= sum; + } + } + template class Distribution; template std::ostream& operator<<(std::ostream& out, Distribution const& distribution); diff --git a/src/storm/storage/Distribution.h b/src/storm/storage/Distribution.h index d7e0bd2fb..c3ac58dcc 100644 --- a/src/storm/storage/Distribution.h +++ b/src/storm/storage/Distribution.h @@ -144,6 +144,11 @@ namespace storm { */ ValueType getProbability(StateType const& state) const; + /*! + * Normalizes the distribution such that the values sum up to one. + */ + void normalize(); + private: // A list of states and the probabilities that are assigned to them. container_type distribution; From ab26b6943558e711bb5a7e2c064908914ca444ff Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 14:01:00 +0200 Subject: [PATCH 12/40] Added BeliefMdpExplorer which does most of the work when exploring (triangulated Variants of) the BeliefMdp. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 354 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.cpp | 248 ++++-------- .../ApproximatePOMDPModelchecker.h | 9 +- src/storm-pomdp/storage/BeliefManager.h | 24 +- 4 files changed, 443 insertions(+), 192 deletions(-) create mode 100644 src/storm-pomdp/builder/BeliefMdpExplorer.h diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h new file mode 100644 index 000000000..107f699ae --- /dev/null +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -0,0 +1,354 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "storm/api/properties.h" +#include "storm/api/verification.h" + +#include "storm/storage/BitVector.h" +#include "storm/utility/macros.h" +#include "storm-pomdp/storage/BeliefManager.h" +#include "storm/utility/SignalHandler.h" + +namespace storm { + namespace builder { + template + class BeliefMdpExplorer { + public: + typedef typename PomdpType::ValueType ValueType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef typename BeliefManagerType::BeliefId BeliefId; + typedef uint64_t MdpStateType; + + BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { + // Intentionally left empty + } + + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + // Reset data from potential previous explorations + mdpStateToBeliefIdMap.clear(); + beliefIdToMdpStateMap.clear(); + beliefIdsWithMdpState.clear(); + beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false); + lowerValueBounds.clear(); + upperValueBounds.clear(); + values.clear(); + mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder(0, 0, 0, true, true); + currentRowCount = 0; + startOfCurrentRowGroup = 0; + mdpActionRewards.clear(); + exploredMdp = nullptr; + + // Add some states with special treatment (if requested) + if (extraBottomStateValue) { + extraBottomState = getCurrentNumberOfMdpStates(); + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); + + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one()); + ++currentRowCount; + } else { + extraBottomState = boost::none; + } + if (extraTargetStateValue) { + extraTargetState = getCurrentNumberOfMdpStates(); + mdpStateToBeliefIdMap.push_back(beliefManager->noId()); + insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); + + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one()); + ++currentRowCount; + + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(extraTargetState.get(), true); + } else { + extraTargetState = boost::none; + } + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } + + bool hasUnexploredState() const { + return !beliefIdsToExplore.empty(); + } + + BeliefId exploreNextState() { + // Set up the matrix builder + finishCurrentRow(); + startOfCurrentRowGroup = currentRowCount; + mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); + ++currentRowCount; + + // Pop from the queue. + auto result = beliefIdsToExplore.front(); + beliefIdsToExplore.pop_front(); + return result; + } + + void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + + uint64_t row = startOfCurrentRowGroup + localActionIndex; + if (!storm::utility::isZero(bottomStateValue)) { + STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); + internalAddTransition(row, extraBottomState.get(), bottomStateValue); + } + if (!storm::utility::isZero(targetStateValue)) { + STORM_LOG_ASSERT(extraTargetState.is_initialized(), "Requested a transition to the extra target state but there is none."); + internalAddTransition(row, extraTargetState.get(), targetStateValue); + } + } + + void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + uint64_t row = startOfCurrentRowGroup + localActionIndex; + internalAddTransition(row, getCurrentMdpState(), value); + } + + /*! + * Adds the next transition to the given successor belief + * @param localActionIndex + * @param transitionTarget + * @param value + * @param ignoreNewBeliefs If true, beliefs that were not found before are not inserted, i.e. we might not insert the transition. + * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. + */ + bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + MdpStateType column; + if (ignoreNewBeliefs) { + column = getMdpState(transitionTarget); + if (column == noState()) { + return false; + } + } else { + column = getOrAddMdpState(transitionTarget); + } + uint64_t row = startOfCurrentRowGroup + localActionIndex; + internalAddTransition(row, column, value); + return true; + } + + void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + if (currentRowCount >= mdpActionRewards.size()) { + mdpActionRewards.resize(currentRowCount, storm::utility::zero()); + } + uint64_t row = startOfCurrentRowGroup + localActionIndex; + mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; + } + + void setCurrentStateIsTarget() { + targetStates.grow(getCurrentNumberOfMdpStates(), false); + targetStates.set(getCurrentMdpState(), true); + } + + void setCurrentStateIsTruncated() { + truncatedStates.grow(getCurrentNumberOfMdpStates(), false); + truncatedStates.set(getCurrentMdpState(), true); + } + + void finishExploration() { + // Create the tranistion matrix + finishCurrentRow(); + auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); + + // Create a standard labeling + storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabelToState("init", initialMdpState); + targetStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("target", std::move(targetStates)); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + mdpLabeling.addLabel("truncated", std::move(truncatedStates)); + + // Create a standard reward model (if rewards are available) + std::unordered_map> mdpRewardModels; + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + } + + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + } + + std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); + return exploredMdp; + } + + MdpStateType getCurrentNumberOfMdpStates() const { + return mdpStateToBeliefIdMap.size(); + } + + MdpStateType getCurrentNumberOfMdpChoices() const { + return currentRowCount; + } + + ValueType getLowerValueBoundAtCurrentState() const { + return lowerValueBounds[getCurrentMdpState()]; + } + + ValueType getUpperValueBoundAtCurrentState() const { + return upperValueBounds[getCurrentMdpState()]; + } + + ValueType computeLowerValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpLowerValueBounds); + } + + ValueType computeUpperValueBoundAtBelief(BeliefId const& beliefId) const { + return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); + } + + std::vector const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); + auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); + auto task = createStandardCheckTask(property); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); + if (res) { + values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + } else { + STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); + STORM_LOG_ERROR("No result obtained while checking."); + } + return values; + } + + ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); + return values[exploredMdp->getInitialStates().getNextSetIndex(0)]; + } + + private: + MdpStateType noState() const { + return std::numeric_limits::max(); + } + + std::shared_ptr createStandardProperty(storm::solver::OptimizationDirection const& dir, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += storm::solver::minimize(dir) ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(values); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + return task; + } + + MdpStateType getCurrentMdpState() const { + return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1; + } + + MdpStateType getCurrentBeliefId() const { + return mdpStateToBeliefIdMap[getCurrentMdpState()]; + } + + void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { + // We first insert the entries of the current row in a separate map. + // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row."); + if (row >= currentRowCount) { + // We are going to start a new row, so insert the entries of the old one + finishCurrentRow(); + currentRowCount = row + 1; + } + STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state."); + mdpTransitionsBuilderCurrentRowEntries[column] = value; + } + + void finishCurrentRow() { + for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) { + mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second); + } + mdpTransitionsBuilderCurrentRowEntries.clear(); + } + + MdpStateType getMdpState(BeliefId const& beliefId) const { + if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) { + return beliefIdToMdpStateMap.at(beliefId); + } else { + return noState(); + } + } + + void insertValueHints(ValueType const& lowerBound, ValueType const& upperBound) { + lowerValueBounds.push_back(lowerBound); + upperValueBounds.push_back(upperBound); + // Take the middle value as a hint + values.push_back((lowerBound + upperBound) / storm::utility::convertNumber(2)); + STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states."); + STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size."); + } + + MdpStateType getOrAddMdpState(BeliefId const& beliefId) { + beliefIdsWithMdpState.grow(beliefId + 1, false); + if (beliefIdsWithMdpState.get(beliefId)) { + return beliefIdToMdpStateMap[beliefId]; + } else { + // Add a new MDP state + beliefIdsWithMdpState.set(beliefId, true); + MdpStateType result = getCurrentNumberOfMdpStates(); + assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); + mdpStateToBeliefIdMap.push_back(beliefId); + beliefIdToMdpStateMap[beliefId] = result; + // This new belief needs exploration + beliefIdsToExplore.push_back(beliefId); + + insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + return result; + } + } + + // Belief state related information + std::shared_ptr beliefManager; + std::vector mdpStateToBeliefIdMap; + std::map beliefIdToMdpStateMap; + storm::storage::BitVector beliefIdsWithMdpState; + + // Exploration information + std::deque beliefIdsToExplore; + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder; + std::map mdpTransitionsBuilderCurrentRowEntries; + std::vector mdpActionRewards; + uint64_t startOfCurrentRowGroup; + uint64_t currentRowCount; + + // Special states during exploration + boost::optional extraTargetState; + boost::optional extraBottomState; + storm::storage::BitVector targetStates; + storm::storage::BitVector truncatedStates; + MdpStateType initialMdpState; + + // Final Mdp + std::shared_ptr> exploredMdp; + + // Value related information + std::vector const& pomdpLowerValueBounds; + std::vector const& pomdpUpperValueBounds; + std::vector lowerValueBounds; + std::vector upperValueBounds; + std::vector values; // Contains an estimate during building and the actual result after a check has performed + + }; + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 6728535f5..42f1872f7 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -22,7 +22,8 @@ #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" - +#include "storm-pomdp/builder/BeliefMdpExplorer.h" +#include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" @@ -57,6 +58,10 @@ namespace storm { std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); + + // Compute some initial bounds on the values for each state of the pomdp + auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + if (formulaInfo.isNonNestedReachabilityProbability()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); @@ -68,7 +73,7 @@ namespace storm { if (options.doRefinement) { result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); } else { - result = computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); } } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. @@ -78,7 +83,7 @@ namespace storm { } else { // FIXME: pick the non-unique reward model here STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityRewardOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); @@ -233,8 +238,8 @@ namespace storm { uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - initialOverApproxMap, - initialUnderApproxMap, underApproxModelSize); + {}, + {}, underApproxModelSize); if (res == nullptr) { statistics.refinementSteps = 0; return nullptr; @@ -335,14 +340,14 @@ namespace storm { template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, + std::vector const& lowerPomdpValueBounds, + std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, overApproximationMap, - underApproximationMap, maxUaModelSize); + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds, + upperPomdpValueBounds, maxUaModelSize); if (result == nullptr) { return nullptr; } @@ -353,8 +358,6 @@ namespace storm { } } - - template ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { ValueType result = storm::utility::zero(); @@ -369,155 +372,64 @@ namespace storm { ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, + std::vector const& lowerPomdpValueBounds, + std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize) { - bool boundMapsSet = overApproximationMap && underApproximationMap; - std::map overMap; - std::map underMap; - if (boundMapsSet) { - overMap = overApproximationMap.value(); - underMap = underApproximationMap.value(); - } - + auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); if (computeRewards) { beliefManager->setRewardModel(); // TODO: get actual name } - bsmap_type beliefStateMap; - - std::deque beliefsToBeExpanded; - statistics.overApproximationBuildTime.start(); - // Initial belief always has belief ID 0 - auto initialObservation = beliefManager->getBeliefObservation(beliefManager->getInitialBelief()); - // These are the components to build the MDP from the grid - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; - } - // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector hintVector(nextMdpStateId, storm::utility::zero()); - if (!computeRewards) { - hintVector[extraTargetState] = storm::utility::one(); - } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - - // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - // for the initial belief, add the triangulated initial states - auto triangulation = beliefManager->triangulateBelief(beliefManager->getInitialBelief(), observationResolutionVector[initialObservation]); - uint64_t initialMdpState = nextMdpStateId; - ++nextMdpStateId; - if (triangulation.size() == 1) { - // The initial belief is on the grid itself - auto initBeliefId = triangulation.gridPoints.front(); - if (boundMapsSet) { - auto const& gridPoint = beliefManager->getBelief(initBeliefId); - weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); - } - beliefsToBeExpanded.push_back(initBeliefId); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); + storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (computeRewards) { + explorer.startNewExploration(storm::utility::zero()); } else { - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - for (uint64_t i = 0; i < triangulation.size(); ++i) { - beliefsToBeExpanded.push_back(triangulation.gridPoints[i]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]); - beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); - ++nextMdpStateId; - if (boundMapsSet) { - auto const& gridPoint = beliefManager->getBelief(triangulation.gridPoints[i]); - weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); - weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); - } - hintVector.push_back(targetObservations.find(initialObservation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - ++mdpMatrixRow; + explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } // Expand the beliefs to generate the grid on-the-fly if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); - } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); + while (explorer.hasUnexploredState()) { + uint64_t currId = explorer.exploreNextState(); - uint64_t currMdpState = beliefStateMap.left.at(currId); uint32_t currObservation = beliefManager->getBeliefObservation(currId); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { - // TODO: with rewards we would have to assign the corresponding reward to this transition - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); - ++mdpMatrixRow; + explorer.setCurrentStateIsTarget(); + explorer.addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - uint64_t someState = beliefManager->getBelief(currId).begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - - for (uint64_t action = 0; action < numChoices; ++action) { + bool stopExploration = false; + if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - // Check for newly found grid points - foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); for (auto const& successor : successorGridPoints) { - auto successorId = successor.first; - auto const& successorBelief = beliefManager->getBelief(successorId); - auto successorObservation = beliefManager->getBeliefObservation(successorBelief); - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; - - if (boundMapsSet) { - ValueType upperBound = getWeightedSum(successorBelief, overMap); - ValueType lowerBound = getWeightedSum(successorBelief, underMap); - if (cc.isEqual(upperBound, lowerBound)) { - hintVector.push_back(lowerBound); - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } - weightedSumOverMap[successorId] = upperBound; - weightedSumUnderMap[successorId] = lowerBound; - } else { - hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); - } + bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; + if (stopExploration) { + if (computeRewards) { + explorer.addTransitionsToExtraStates(action, truncationProbability); + } else { + explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + explorer.computeRewardAtCurrentState(action, truncationValueBound); + } } } if (storm::utility::resources::isTerminate()) { @@ -525,64 +437,30 @@ namespace storm { break; } } - statistics.overApproximationStates = nextMdpStateId; - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); + statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return nullptr; } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", initialMdpState); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto overApproxMdp = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefManager->getBelief(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); - } - } - } - overApproxMdp->addRewardModel("default", mdpRewardModel); - } + explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - overApproxMdp->printModelInformationToStream(std::cout); - - auto modelPtr = std::static_pointer_cast>(overApproxMdp); - auto property = createStandardProperty(min, computeRewards); - auto task = createStandardCheckTask(property, std::move(hintVector)); + explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); + explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[initialMdpState]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); + //auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); + //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); @@ -592,8 +470,8 @@ namespace storm { underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); */ - return std::make_unique>(RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, {}, {}, {}, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); + return std::make_unique>(RefinementComponents{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {}, + underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); } @@ -930,14 +808,14 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); + // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); } template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); + // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } @@ -1191,7 +1069,7 @@ namespace storm { auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(currentBelief, action)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action)); } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index a97d36cb2..6216de097 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -117,8 +117,7 @@ namespace storm { */ std::shared_ptr> computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); std::shared_ptr> computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, @@ -140,10 +139,8 @@ namespace storm { * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, bool computeRewards, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + computeReachabilityOTF(std::set const &targetObservations, bool min, bool computeRewards, + std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); /** * Helper to compute an underapproximation of the reachability property. diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index efc6bee2a..9cb7c039c 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -45,6 +45,8 @@ namespace storm { }; BeliefType const& getBelief(BeliefId const& id) const { + STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); + STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); return beliefs[id]; } @@ -54,6 +56,10 @@ namespace storm { return idIt->second; } + BeliefId noId() const { + return std::numeric_limits::max(); + } + std::string toString(BeliefType const& belief) const { std::stringstream str; str << "{ "; @@ -180,11 +186,22 @@ namespace storm { return true; } + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + BeliefId const& getInitialBelief() const { return initialBeliefId; } - ValueType getBeliefActionReward(BeliefType const& belief, uint64_t const& localActionIndex) const { + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); auto result = storm::utility::zero(); auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); @@ -206,6 +223,11 @@ namespace storm { return getBeliefObservation(getBelief(beliefId)); } + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation From 8b0e582ef4d56a09c53179f97be83743efeb0fa9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 14:16:21 +0200 Subject: [PATCH 13/40] Use the new BeliefMdpExplorer also for the underapproximation. --- .../ApproximatePOMDPModelchecker.cpp | 168 ++++++------------ .../ApproximatePOMDPModelchecker.h | 2 +- 2 files changed, 55 insertions(+), 115 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 42f1872f7..9c06c4c06 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -456,7 +456,7 @@ namespace storm { STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); @@ -953,91 +953,62 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize) { + bool computeRewards, uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds) { // Build the belief MDP until enough states are explored. //TODO think of other ways to stop exploration besides model size statistics.underApproximationBuildTime.start(); - - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; + storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (computeRewards) { + explorer.startNewExploration(storm::utility::zero()); + } else { + explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - bsmap_type beliefStateMap; - std::deque beliefsToBeExpanded; - - beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId)); - beliefsToBeExpanded.push_back(beliefManager->getInitialBelief()); - ++nextMdpStateId; - - // Expand the believes - storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); + // Expand the beliefs to generate the grid on-the-fly + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - - uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefManager->getBelief(currId); - uint32_t currObservation = beliefManager->getBeliefObservation(currBelief); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + while (explorer.hasUnexploredState()) { + uint64_t currId = explorer.exploreNextState(); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (currMdpState > maxModelSize) { - if (min) { - // Get an upper bound here - if (computeRewards) { - // TODO: With minimizing rewards we need an upper bound! - // In other cases, this could be helpflull as well. - // For now, add a selfloop to "generate" infinite reward - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one()); - } - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one()); - } - ++mdpMatrixRow; + explorer.setCurrentStateIsTarget(); + explorer.addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - // Iterate over all actions and add the corresponding transitions - uint64_t someState = currBelief.begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - for (uint64_t action = 0; action < numChoices; ++action) { - auto successorBeliefs = beliefManager->expand(currId, action); - // Check for newly found beliefs - foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& successor : successorBeliefs) { - auto successorId = successor.first; - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; + bool stopExploration = false; + if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; + if (stopExploration) { + if (computeRewards) { + explorer.addTransitionsToExtraStates(action, truncationProbability); + } else { + explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + explorer.computeRewardAtCurrentState(action, truncationValueBound); + } } } if (storm::utility::resources::isTerminate()) { @@ -1045,56 +1016,25 @@ namespace storm { break; } } - statistics.underApproximationStates = nextMdpStateId; + statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); return nullptr; } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief())); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto model = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefManager->getBelief(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action)); - } - } - } - model->addRewardModel("default", mdpRewardModel); - model->restrictRewardModels(std::set({"default"})); - } - - model->printModelInformationToStream(std::cout); + explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); + STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl); + explorer.getExploredMdp()->printModelInformationToStream(std::cout); - auto property = createStandardProperty(min, computeRewards); - auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())]; - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); + STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl); + STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); + + return std::make_unique>(UnderApproxComponents{explorer.getComputedValueAtInitialState(), {}, {}}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6216de097..925bff5b5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -162,7 +162,7 @@ namespace storm { uint64_t maxModelSize); std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize); + uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds); /** * Constructs the initial belief for the given POMDP From 94d08d73fb740bf38c747d74f49eeaa1310338c0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 14:42:38 +0200 Subject: [PATCH 14/40] Capitalized GUROBI in FindGUROBI.cmake file because it was not found on linux. --- .../cmake/find_modules/{FindGurobi.cmake => FindGUROBI.cmake} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename resources/cmake/find_modules/{FindGurobi.cmake => FindGUROBI.cmake} (100%) diff --git a/resources/cmake/find_modules/FindGurobi.cmake b/resources/cmake/find_modules/FindGUROBI.cmake similarity index 100% rename from resources/cmake/find_modules/FindGurobi.cmake rename to resources/cmake/find_modules/FindGUROBI.cmake From a80553a700df59d89709427d16ebfffe72342b0c Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 31 Mar 2020 14:43:35 +0200 Subject: [PATCH 15/40] Removed a duplicated method in StandardRewardModel (setStateActionRewardValue did the same as setStateActionReward) --- src/storm/models/sparse/StandardRewardModel.cpp | 5 ----- src/storm/models/sparse/StandardRewardModel.h | 9 --------- 2 files changed, 14 deletions(-) diff --git a/src/storm/models/sparse/StandardRewardModel.cpp b/src/storm/models/sparse/StandardRewardModel.cpp index 159c39446..b017ae58a 100644 --- a/src/storm/models/sparse/StandardRewardModel.cpp +++ b/src/storm/models/sparse/StandardRewardModel.cpp @@ -349,11 +349,6 @@ namespace storm { return result; } - template - void StandardRewardModel::setStateActionRewardValue(uint_fast64_t row, ValueType const& value) { - this->optionalStateActionRewardVector.get()[row] = value; - } - template template void StandardRewardModel::clearRewardAtState(uint_fast64_t state, storm::storage::SparseMatrix const& transitions) { diff --git a/src/storm/models/sparse/StandardRewardModel.h b/src/storm/models/sparse/StandardRewardModel.h index 2bfed8143..aedd7c535 100644 --- a/src/storm/models/sparse/StandardRewardModel.h +++ b/src/storm/models/sparse/StandardRewardModel.h @@ -287,15 +287,6 @@ namespace storm { template storm::storage::BitVector getChoicesWithFilter(storm::storage::SparseMatrix const& transitionMatrix, std::function const& filter) const; - /*! - * Sets the given value in the state-action reward vector at the given row. This assumes that the reward - * model has state-action rewards. - * - * @param row The row at which to set the given value. - * @param value The value to set. - */ - void setStateActionRewardValue(uint_fast64_t row, ValueType const& value); - /*! * Retrieves whether the reward model is empty, i.e. contains no state-, state-action- or transition-based * rewards. From 37fa53c4d827ff78b27cc49b5d712e02361961e0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 12:39:02 +0200 Subject: [PATCH 16/40] Added a command-line-switch to disable making a pomdp canonic (for prism compatibility) --- .../settings/modules/POMDPSettings.cpp | 6 +++++ .../settings/modules/POMDPSettings.h | 1 + src/storm-pomdp-cli/storm-pomdp.cpp | 26 +++++++++---------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp index 4ba3f8148..57c065f7a 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.cpp @@ -13,6 +13,7 @@ namespace storm { namespace modules { const std::string POMDPSettings::moduleName = "pomdp"; + const std::string noCanonicOption = "nocanonic"; const std::string exportAsParametricModelOption = "parametric-drn"; const std::string gridApproximationOption = "gridapproximation"; const std::string qualitativeReductionOption = "qualitativereduction"; @@ -31,6 +32,7 @@ namespace storm { const std::string checkFullyObservableOption = "check-fully-observable"; POMDPSettings::POMDPSettings() : ModuleSettings(moduleName) { + this->addOption(storm::settings::OptionBuilder(moduleName, noCanonicOption, false, "If this is set, actions will not be ordered canonically. Could yield incorrect results.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, exportAsParametricModelOption, false, "Export the parametric file.").addArgument(storm::settings::ArgumentBuilder::createStringArgument("filename", "The name of the file to which to write the model.").build()).build()); this->addOption(storm::settings::OptionBuilder(moduleName, qualitativeReductionOption, false, "Reduces the model size by performing qualitative analysis (E.g. merge states with prob. 1.").build()); this->addOption(storm::settings::OptionBuilder(moduleName, analyzeUniqueObservationsOption, false, "Computes the states with a unique observation").build()); @@ -47,6 +49,10 @@ namespace storm { } + bool POMDPSettings::isNoCanonicSet() const { + return this->getOption(noCanonicOption).getHasOptionBeenSet(); + } + bool POMDPSettings::isExportToParametricSet() const { return this->getOption(exportAsParametricModelOption).getHasOptionBeenSet(); } diff --git a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h index 768766536..6754ac55c 100644 --- a/src/storm-pomdp-cli/settings/modules/POMDPSettings.h +++ b/src/storm-pomdp-cli/settings/modules/POMDPSettings.h @@ -26,6 +26,7 @@ namespace storm { bool isQualitativeReductionSet() const; + bool isNoCanonicSet() const; bool isGridApproximationSet() const; bool isAnalyzeUniqueObservationsSet() const; bool isMecReductionSet() const; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 58637c6e7..9b3026832 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -102,7 +102,7 @@ namespace storm { if (pomdpSettings.isGridApproximationSet()) { STORM_PRINT_AND_LOG("Applying grid approximation... "); auto const& gridSettings = storm::settings::getModule(); - typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker::Options options; + typename storm::pomdp::modelchecker::ApproximatePOMDPModelchecker>::Options options; options.initialGridResolution = gridSettings.getGridResolution(); options.explorationThreshold = storm::utility::convertNumber(gridSettings.getExplorationThreshold()); options.doRefinement = gridSettings.isRefineSet(); @@ -117,20 +117,16 @@ namespace storm { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "A non-zero numeric precision was set although exact arithmethic is used. Results might be inexact."); } } - storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(*pomdp, options); - std::unique_ptr> result = checker.check(formula); + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker> checker(*pomdp, options); + auto result = checker.check(formula); checker.printStatisticsToStream(std::cout); - if (result) { - if (storm::utility::resources::isTerminate()) { - STORM_PRINT_AND_LOG("\nResult till abort: ") - } else { - STORM_PRINT_AND_LOG("\nResult: ") - } - printResult(result->underApproxValue, result->overApproxValue); - STORM_PRINT_AND_LOG(std::endl); + if (storm::utility::resources::isTerminate()) { + STORM_PRINT_AND_LOG("\nResult till abort: ") } else { - STORM_PRINT_AND_LOG("\nResult: Not available." << std::endl); + STORM_PRINT_AND_LOG("\nResult: ") } + printResult(result.lowerBound, result.upperBound); + STORM_PRINT_AND_LOG(std::endl); analysisPerformed = true; } if (pomdpSettings.isMemlessSearchSet()) { @@ -263,8 +259,10 @@ namespace storm { STORM_LOG_THROW(model->getType() == storm::models::ModelType::Pomdp && model->isSparseModel(), storm::exceptions::WrongFormatException, "Expected a POMDP in sparse representation."); std::shared_ptr> pomdp = model->template as>(); - storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); - pomdp = makeCanonic.transform(); + if (!pomdpSettings.isNoCanonicSet()) { + storm::transformer::MakePOMDPCanonic makeCanonic(*pomdp); + pomdp = makeCanonic.transform(); + } std::shared_ptr formula; if (!symbolicInput.properties.empty()) { From 71e065449890913cfdf56c81400fa9d6387228ee Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 12:39:56 +0200 Subject: [PATCH 17/40] Changed method signatures to new data structures. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 10 +- .../ApproximatePOMDPModelchecker.cpp | 765 ++++-------------- .../ApproximatePOMDPModelchecker.h | 107 +-- 3 files changed, 200 insertions(+), 682 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 107f699ae..33e1d1f51 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -6,6 +6,7 @@ #include #include +#include "storm-parsers/api/properties.h" #include "storm/api/properties.h" #include "storm/api/verification.h" @@ -13,20 +14,25 @@ #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" #include "storm/utility/SignalHandler.h" +#include "storm/modelchecker/results/CheckResult.h" +#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" +#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" namespace storm { namespace builder { - template + template class BeliefMdpExplorer { public: typedef typename PomdpType::ValueType ValueType; - typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::storage::BeliefManager BeliefManagerType; typedef typename BeliefManagerType::BeliefId BeliefId; typedef uint64_t MdpStateType; BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { // Intentionally left empty } + BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { // Reset data from potential previous explorations diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 9c06c4c06..436fc3e09 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -15,13 +15,8 @@ #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" -#include "storm/modelchecker/results/CheckResult.h" -#include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" -#include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" -#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" -#include "storm-parsers/api/storm-parsers.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" @@ -32,8 +27,8 @@ namespace storm { namespace pomdp { namespace modelchecker { - template - ApproximatePOMDPModelchecker::Options::Options() { + template + ApproximatePOMDPModelchecker::Options::Options() { initialGridResolution = 10; explorationThreshold = storm::utility::zero(); doRefinement = true; @@ -41,61 +36,78 @@ namespace storm { numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; } - template - ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { + + template + ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { + // Intentionally left empty + } + + template + typename ApproximatePOMDPModelchecker::ValueType + ApproximatePOMDPModelchecker::Result::diff(bool relative) const { + ValueType diff = upperBound - lowerBound; + if (diff < storm::utility::zero()) { + STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); + diff = storm::utility::zero(); + } + if (relative && !storm::utility::isZero(upperBound)) { + diff /= upperBound; + } + return diff; + } + + template + ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } - template - ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } - template - std::unique_ptr> ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { + template + typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { // Reset all collected statistics statistics = Statistics(); - std::unique_ptr> result; // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); + Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); - if (formulaInfo.isNonNestedReachabilityProbability()) { - // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. - STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (!formulaInfo.getSinkStates().empty()) { - auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); - reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; - STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); - } - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); - } else { - result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); - } - } else if (formulaInfo.isNonNestedExpectedRewardFormula()) { + boost::optional rewardModelName; + if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); - if (options.doRefinement) { - result = refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getSinkStates().empty()) { + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } } else { - // FIXME: pick the non-unique reward model here - STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); - result = computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper); + // Expected reward formula! + rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } + + if (options.doRefinement) { + refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } else { + computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); + } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } return result; } - template - void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { + template + void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); @@ -143,114 +155,82 @@ namespace storm { stream << "##########################################" << std::endl; } - std::shared_ptr createStandardProperty(bool min, bool computeRewards) { - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - return storm::api::extractFormulasFromProperties(propertyVector).front(); - } - - template - storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property, std::vector&& hintVector) { - //Note: The property should not run out of scope after calling this because the task only stores the property by reference. - // Therefore, this method needs the property by reference (and not const reference) - auto task = storm::api::createTask(property, false); - if (!hintVector.empty()) { - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(std::move(hintVector)); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); - } - return task; - } + - template - std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, bool computeRewards) { - std::srand(time(NULL)); - // Compute easy upper and lower bounds - storm::utility::Stopwatch underlyingWatch(true); - // Compute the results on the underlying MDP as a basic overapproximation - storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); - // TODO: Is the following really necessary - underlyingMdpLabeling.addLabel("__goal__"); - std::vector goalStates; - for (auto const &targetObs : targetObservations) { - for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { - underlyingMdpLabeling.addLabelToState("__goal__", goalState); - } - } - storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); - auto underlyingModel = std::static_pointer_cast>( - std::make_shared>(underlyingMdp)); - std::string initPropString = computeRewards ? "R" : "P"; - initPropString += min ? "min" : "max"; - initPropString += "=? [F \"__goal__\"]"; - std::vector propVector = storm::api::parseProperties(initPropString); - std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); - STORM_PRINT("Underlying MDP" << std::endl) - if (computeRewards) { - underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel()); + template + void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - underlyingMdp.printModelInformationToStream(std::cout); - std::unique_ptr underlyingRes( - storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underlyingRes, "Result not exist."); - underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); - underlyingWatch.stop(); - - storm::utility::Stopwatch positionalWatch(true); - // we define some positional scheduler for the POMDP as a basic lower bound - storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); - for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { - auto obsStates = pomdp.getStatesWithObservation(obs); - // select a random action for all states with the same observation - uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); - for (auto const &state : obsStates) { - pomdpScheduler.setChoice(chosenAction, state); + + uint64_t underApproxSizeThreshold = 0; + { // Overapproximation + std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); + if (approx) { + STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.lowerBound : result.upperBound; + resultValue = approx->getComputedValueAtInitialState(); + underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); + } + } + { // Underapproximation (Uses a fresh Belief manager) + auto manager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); + if (approx) { + STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); + approx->getExploredMdp()->printModelInformationToStream(std::cout); + ValueType& resultValue = min ? result.upperBound : result.lowerBound; + resultValue = approx->getComputedValueAtInitialState(); } } - auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); - if (computeRewards) { - underApproxModel->restrictRewardModels({"std"}); - } - STORM_PRINT("Random Positional Scheduler" << std::endl) - underApproxModel->printModelInformationToStream(std::cout); - std::unique_ptr underapproxRes( - storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); - STORM_LOG_ASSERT(underapproxRes, "Result not exist."); - underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); - auto initialUnderApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); - positionalWatch.stop(); - - STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " - << initialUnderApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) - STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) - - // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. - // This can probably be improved (i.e. resolutions for single belief states) - STORM_PRINT("Initial Resolution: " << options.initialGridResolution << std::endl) + } + + template + void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { + + // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - std::set changedObservations; - uint64_t underApproxModelSize = 200; - uint64_t refinementCounter = 1; - STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - {}, - {}, underApproxModelSize); - if (res == nullptr) { - statistics.refinementSteps = 0; - return nullptr; + auto beliefManager = std::make_shared(pomdp, options.numericPrecision); + if (rewardModelName) { + beliefManager->setRewardModel(rewardModelName); + } + + // OverApproximaion + auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); + if (!overApproximation) { + return; } - ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || - (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { + ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; + overApproxValue = overApproximation->getComputedValueAtInitialState(); + + // UnderApproximation TODO: use same belief manager?) + uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); + auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); + if (!underApproximation) { + return; + } + ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; + underApproxValue = underApproximation->getComputedValueAtInitialState(); + + // ValueType lastMinScore = storm::utility::infinity(); + // Start refinement + statistics.refinementSteps = 0; + while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } // TODO the actual refinement + /* // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); std::vector beliefCount(pomdp.getNrObservations(), 0); @@ -286,9 +266,9 @@ namespace storm { } - /*for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - }*/ + //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { + // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); + //} changedObservations.clear(); //TODO think about some other scoring methods @@ -301,21 +281,21 @@ namespace storm { observationResolutionVector[i] = maxRes + 1; changedObservations.insert(i); } - /*} else { - lastMinScore = std::min(maxAvgDifference, lastMinScore); - STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { + //} else { + // lastMinScore = std::min(maxAvgDifference, lastMinScore); + // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) + // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) + // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) + // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { + // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { //STORM_PRINT(" *** ") - observationResolutionVector[i] += 1; - changedObservations.insert(i); - } + // observationResolutionVector[i] += 1; + // changedObservations.insert(i); + // } //STORM_PRINT(std::endl) - } - }*/ + // } + //} if (underApproxModelSize < std::numeric_limits::max() - 101) { underApproxModelSize += 100; } @@ -327,60 +307,13 @@ namespace storm { STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); - ++refinementCounter; - } - statistics.refinementSteps = refinementCounter; - if (min) { - return std::make_unique>(POMDPCheckResult{res->underApproxValue, res->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); - } - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, - bool computeRewards, - std::vector const& lowerPomdpValueBounds, - std::vector const& upperPomdpValueBounds, - uint64_t maxUaModelSize) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto result = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, lowerPomdpValueBounds, - upperPomdpValueBounds, maxUaModelSize); - if (result == nullptr) { - return nullptr; - } - if (min) { - return std::make_unique>(POMDPCheckResult{result->underApproxValue, result->overApproxValue}); - } else { - return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); + */ + ++statistics.refinementSteps.get(); } } - template - ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { - ValueType result = storm::utility::zero(); - for (auto const& entry : belief) { - result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); - } - return result; - } - - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::vector const& lowerPomdpValueBounds, - std::vector const& upperPomdpValueBounds, - uint64_t maxUaModelSize) { - - auto beliefManager = std::make_shared>>(pomdp, options.numericPrecision); - if (computeRewards) { - beliefManager->setRewardModel(); // TODO: get actual name - } - + template + std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { statistics.overApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { @@ -390,9 +323,6 @@ namespace storm { } // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } while (explorer.hasUnexploredState()) { uint64_t currId = explorer.exploreNextState(); @@ -445,39 +375,20 @@ namespace storm { explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.overApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - // TODO: return other components needed for refinement. - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); - //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, {}, {}, {}, beliefStateMap, {}, beliefManager->getInitialBelief()}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - /* TODO: return other components needed for refinement. - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, - underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); - */ - return std::make_unique>(RefinementComponents{explorer.getExploredMdp(), explorer.getComputedValueAtInitialState(), underApproxComponents->underApproxValue, {}, - underApproxComponents->underApproxMap, {}, {}, {}, {}, underApproxComponents->underApproxBeliefStateMap, beliefManager->getInitialBelief()}); - + return std::make_shared(std::move(explorer)); } - template + template + void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + /*TODO: + template std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, + ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, std::shared_ptr> refinementComponents, @@ -504,7 +415,7 @@ namespace storm { uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; + std::set relevantStates; // The MDP states where the observation has changed for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { auto currentBelief = refinementComponents->beliefList[iter.first]; if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { @@ -512,7 +423,7 @@ namespace storm { } } - std::set> statesAndActionsToCheck; + std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( @@ -536,6 +447,7 @@ namespace storm { action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { + // Expand and triangulate the successor uint32_t observation = iter->first; uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); @@ -803,160 +715,12 @@ namespace storm { refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, true); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min) { - std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - // return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); + */ } - - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, - bool computeRewards, uint64_t maxModelSize) { - std::set visitedBelieves; - std::deque beliefsToBeExpanded; - bsmap_type beliefStateMap; - std::vector>> transitions = {{{{0, storm::utility::one()}}}, - {{{1, storm::utility::one()}}}}; - std::vector targetStates = {1}; - - uint64_t stateId = 2; - beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId)); - ++stateId; - uint64_t nextId = beliefList.size(); - uint64_t counter = 0; - statistics.underApproximationBuildTime.start(); - // Expand the believes - visitedBelieves.insert(initialBeliefId); - beliefsToBeExpanded.push_back(initialBeliefId); - while (!beliefsToBeExpanded.empty()) { - //TODO think of other ways to stop exploration besides model size - auto currentBeliefId = beliefsToBeExpanded.front(); - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); - // for targets, we only consider one action with one transition - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states - targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); - transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); - } else if (counter > maxModelSize) { - transitions.push_back({{{0, storm::utility::one()}}}); - } else { - // Iterate over all actions and add the corresponding transitions - std::vector> actionTransitionStorage; - //TODO add a way to extract the actions from the over-approx and use them here? - for (uint64_t action = 0; action < numChoices; ++action) { - std::map transitionsInStateWithAction; - std::map observationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currentBeliefId], action); - for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], - action, - observation, nextId); - nextId = beliefList.size(); - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); - ++stateId; - beliefsToBeExpanded.push_back(nextBeliefId); - ++counter; - } - transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; - } - actionTransitionStorage.push_back(transitionsInStateWithAction); - } - transitions.push_back(actionTransitionStorage); - } - beliefsToBeExpanded.pop_front(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildAborted = true; - break; - } - } - statistics.underApproximationStates = transitions.size(); - if (storm::utility::resources::isTerminate()) { - statistics.underApproximationBuildTime.stop(); - return nullptr; - } + template + std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - std::shared_ptr> model; - auto transitionMatrix = buildTransitionMatrix(transitions); - if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { - transitionMatrix.makeRowGroupingTrivial(); - } - storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); - storm::models::sparse::Mdp underApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); - } - model = std::make_shared>(underApproxMdp); - - model->printModelInformationToStream(std::cout); - statistics.underApproximationBuildTime.stop(); - - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); - statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, - std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds) { - // Build the belief MDP until enough states are explored. - //TODO think of other ways to stop exploration besides model size - statistics.underApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { @@ -981,7 +745,7 @@ namespace storm { if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; explorer.setCurrentStateIsTruncated(); - } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) { + } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { stopExploration = true; explorer.setCurrentStateIsTruncated(); } @@ -1024,249 +788,22 @@ namespace storm { explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); - STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl); - explorer.getExploredMdp()->printModelInformationToStream(std::cout); statistics.underApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl); - STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); - - return std::make_unique>(UnderApproxComponents{explorer.getComputedValueAtInitialState(), {}, {}}); - } - - - template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - uint64_t nrColumns = transitions.size(); - uint64_t nrRows = 0; - uint64_t nrEntries = 0; - for (auto const &actionTransitions : transitions) { - for (auto const &map : actionTransitions) { - nrEntries += map.size(); - ++nrRows; - } - } - storm::storage::SparseMatrixBuilder smb(nrRows, nrColumns, nrEntries, true, true); - for (auto const &actionTransitions : transitions) { - smb.newRowGroup(currentRow); - for (auto const &map : actionTransitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - ++currentRowGroup; - } - return smb.build(); - } - - template - uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( - std::vector> const &grid, uint32_t observation, - std::map &probabilities) { - // TODO This one is quite slow - for (auto const &belief : grid) { - if (belief.observation == observation) { - bool same = true; - for (auto const &probEntry : belief.probabilities) { - if (probabilities.find(probEntry.first) == probabilities.end()) { - same = false; - break; - } - if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) { - same = false; - break; - } - } - if (same) { - return belief.id; - } - } - } - return -1; - } - - template - storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief(uint64_t id) { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, - "POMDP contains more than one initial state"); - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, - "POMDP does not contain an initial state"); - std::map distribution; - uint32_t observation = 0; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (pomdp.getInitialStates()[state] == 1) { - distribution[state] = storm::utility::one(); - observation = pomdp.getObservation(state); - break; - } - } - return storm::pomdp::Belief{id, observation, distribution}; - } - - template - std::pair>, std::vector> - ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::map &probabilities, uint64_t resolution, uint64_t nrStates) { - - //TODO this can also be simplified using the sparse vector interpretation - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : probabilities) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; - } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } - } - } - } - std::vector> subSimplex(nrStates); - for (size_t j = 0; j < nrStates; ++j) { - for (size_t i = 0; i < nrStates - 1; ++i) { - if (cc.isLess(storm::utility::zero(), qs[j][i] - qs[j][i + 1])) { - subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; - } - } - - if (cc.isLess(storm::utility::zero(), qs[j][nrStates - 1])) { - subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution; - } - } - - std::vector lambdas(nrStates, storm::utility::zero()); - auto sum = storm::utility::zero(); - for (size_t i = 1; i < nrStates; ++i) { - lambdas[i] = d[p[i - 1]] - d[p[i]]; - sum += d[p[i - 1]] - d[p[i]]; - } - lambdas[0] = storm::utility::one() - sum; - - return std::make_pair(subSimplex, lambdas); - } - - - template - std::map - ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::pomdp::Belief &belief, - uint64_t actionIndex) { - std::map res; - // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::map postProbabilities; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (entry.getValue() > 0) { - postProbabilities[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - for (auto const &probEntry : postProbabilities) { - uint32_t observation = pomdp.getObservation(probEntry.first); - if (res.count(observation) == 0) { - res[observation] = probEntry.second; - } else { - res[observation] += probEntry.second; - } - } - - return res; + return std::make_shared(std::move(explorer)); } - template - uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, - uint32_t observation, uint64_t id) { - std::map distributionAfter; - for (auto const &probEntry : belief.probabilities) { - uint64_t state = probEntry.first; - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (pomdp.getObservation(entry.getColumn()) == observation) { - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } - } - } - // We have to normalize the distribution - auto sum = storm::utility::zero(); - for (auto const &entry : distributionAfter) { - sum += entry.second; - } - - for (auto const &entry : distributionAfter) { - distributionAfter[entry.first] /= sum; - } - if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { - auto res = getBeliefIdInVector(beliefList, observation, distributionAfter); - return res; - } else { - beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - return id; - } + template + void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { + // TODO } - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, std::map const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief) { - auto result = storm::utility::zero(); - for (auto const &probEntry : belief.probabilities) { - result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); - } - return result; - } - - - template - class ApproximatePOMDPModelchecker; - -#ifdef STORM_HAVE_CARL - - template - class ApproximatePOMDPModelchecker; + template class ApproximatePOMDPModelchecker>; + template class ApproximatePOMDPModelchecker>; -#endif } } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 925bff5b5..0d59ac31a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -4,6 +4,7 @@ #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" #include "storm-pomdp/storage/BeliefManager.h" +#include "storm-pomdp/builder/BeliefMdpExplorer.h" #include #include "storm/storage/jani/Property.h" @@ -17,12 +18,6 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - template - struct POMDPCheckResult { - ValueType overApproxValue; - ValueType underApproxValue; - }; - /** * Struct containing information which is supposed to be persistent over multiple refinement steps * @@ -49,9 +44,13 @@ namespace storm { bsmap_type underApproxBeliefStateMap; }; - template> + template class ApproximatePOMDPModelchecker { public: + typedef typename PomdpModelType::ValueType ValueType; + typedef typename PomdpModelType::RewardModelType RewardModelType; + typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::builder::BeliefMdpExplorer ExplorerType; struct Options { Options(); @@ -63,85 +62,60 @@ namespace storm { bool cacheSubsimplices; /// Enables caching of subsimplices }; - ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options = Options()); + struct Result { + Result(ValueType lower, ValueType upper); + ValueType lowerBound; + ValueType upperBound; + ValueType diff (bool relative = false) const; + }; + + ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options = Options()); - std::unique_ptr> check(storm::logic::Formula const& formula); + Result check(storm::logic::Formula const& formula); void printStatisticsToStream(std::ostream& stream) const; private: /** - * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop + * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size * - * @param targetObservations the set of observations to be reached - * @param min true if minimum probability is to be computed - * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * @param targetObservations set of target observations + * @param min true if minimum value is to be computed + * @param observationResolutionVector vector containing the resolution to be used for each observation + * @param computeRewards true if rewards are to be computed, false if probability is computed + * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value + * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value + * @param maxUaModelSize the maximum size of the underapproximation model to be generated + * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - refineReachability(std::set const &targetObservations, bool min, bool computeRewards); - + void computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); + + /** - * Compute the reachability probability of given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation + * Compute the reachability probability of given target observations on a POMDP using the automatic refinement loop * * @param targetObservations the set of observations to be reached * @param min true if minimum probability is to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ - std::unique_ptr> - computeReachabilityProbabilityOTF(std::set const &targetObservations, bool min); + void refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result); /** - * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. - * On-the-fly state space generation is used for the overapproximation - * - * @param targetObservations the set of observations to be reached - * @param min true if minimum rewards are to be computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values + * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - std::unique_ptr> - computeReachabilityRewardOTF(std::set const &targetObservations, bool min); + std::shared_ptr computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager); + + void refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); - private: /** - * Helper method to compute the inital step of the refinement loop - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return struct containing components generated during the computation to be used in later refinement iterations + * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - std::shared_ptr> - computeFirstRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); + std::shared_ptr computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager); - std::shared_ptr> - computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, - bool computeRewards, std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); + void refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - /** - * Helper method that handles the computation of reachability probabilities and rewards using the on-the-fly state space generation for a fixed grid size - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param observationResolutionVector vector containing the resolution to be used for each observation - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @param overApproximationMap optional mapping of original POMDP states to a naive overapproximation value - * @param underApproximationMap optional mapping of original POMDP states to a naive underapproximation value - * @param maxUaModelSize the maximum size of the underapproximation model to be generated - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityOTF(std::set const &targetObservations, bool min, bool computeRewards, - std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxUaModelSize = 200); +#ifdef REMOVE_THIS /** * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. @@ -243,7 +217,8 @@ namespace storm { */ ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); ValueType getRewardAfterAction(uint64_t action, std::map const& belief); - +#endif //REMOVE_THIS + struct Statistics { Statistics(); boost::optional refinementSteps; @@ -262,7 +237,7 @@ namespace storm { }; Statistics statistics; - storm::models::sparse::Pomdp const& pomdp; + PomdpModelType const& pomdp; Options options; storm::utility::ConstantsComparator cc; }; From 5388ed98e3d56a379a9350b169db59f1d0509a41 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 12:51:32 +0200 Subject: [PATCH 18/40] BeliefMdpExplorer: Added a few asserts so that methods can only be called in the corresponding exploration phase --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 38 +++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 33e1d1f51..86f49fe02 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -29,12 +29,20 @@ namespace storm { typedef typename BeliefManagerType::BeliefId BeliefId; typedef uint64_t MdpStateType; - BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds) { + enum class Status { + Uninitialized, + Exploring, + ModelFinished, + ModelChecked + }; + + BeliefMdpExplorer(std::shared_ptr beliefManager, std::vector const& pomdpLowerValueBounds, std::vector const& pomdpUpperValueBounds) : beliefManager(beliefManager), pomdpLowerValueBounds(pomdpLowerValueBounds), pomdpUpperValueBounds(pomdpUpperValueBounds), status(Status::Uninitialized) { // Intentionally left empty } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { + status = Status::Exploring; // Reset data from potential previous explorations mdpStateToBeliefIdMap.clear(); beliefIdToMdpStateMap.clear(); @@ -83,10 +91,12 @@ namespace storm { } bool hasUnexploredState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return !beliefIdsToExplore.empty(); } BeliefId exploreNextState() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // Set up the matrix builder finishCurrentRow(); startOfCurrentRowGroup = currentRowCount; @@ -100,6 +110,7 @@ namespace storm { } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // We first insert the entries of the current row in a separate map. // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) @@ -115,6 +126,7 @@ namespace storm { } void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); uint64_t row = startOfCurrentRowGroup + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } @@ -128,6 +140,7 @@ namespace storm { * @return true iff a transition was actually inserted. False can only happen if ignoreNewBeliefs is true. */ bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // We first insert the entries of the current row in a separate map. // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) MdpStateType column; @@ -145,6 +158,7 @@ namespace storm { } void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); if (currentRowCount >= mdpActionRewards.size()) { mdpActionRewards.resize(currentRowCount, storm::utility::zero()); } @@ -153,16 +167,19 @@ namespace storm { } void setCurrentStateIsTarget() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(getCurrentMdpState(), true); } void setCurrentStateIsTruncated() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); truncatedStates.grow(getCurrentNumberOfMdpStates(), false); truncatedStates.set(getCurrentMdpState(), true); } void finishExploration() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); // Create the tranistion matrix finishCurrentRow(); auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); @@ -185,26 +202,32 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); + status = Status::ModelFinished; } std::shared_ptr> getExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); return exploredMdp; } MdpStateType getCurrentNumberOfMdpStates() const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); } MdpStateType getCurrentNumberOfMdpChoices() const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); return currentRowCount; } ValueType getLowerValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return lowerValueBounds[getCurrentMdpState()]; } ValueType getUpperValueBoundAtCurrentState() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return upperValueBounds[getCurrentMdpState()]; } @@ -216,7 +239,8 @@ namespace storm { return beliefManager->getWeightedSum(beliefId, pomdpUpperValueBounds); } - std::vector const& computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + void computeValuesOfExploredMdp(storm::solver::OptimizationDirection const& dir) { + STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored"); auto property = createStandardProperty(dir, exploredMdp->hasRewardModel()); auto task = createStandardCheckTask(property); @@ -228,12 +252,18 @@ namespace storm { STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); STORM_LOG_ERROR("No result obtained while checking."); } + status = Status::ModelChecked; + } + + std::vector const& getValuesOfExploredMdp() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); return values; } ValueType const& getComputedValueAtInitialState() const { + STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored."); - return values[exploredMdp->getInitialStates().getNextSetIndex(0)]; + return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; } private: @@ -355,6 +385,8 @@ namespace storm { std::vector upperValueBounds; std::vector values; // Contains an estimate during building and the actual result after a check has performed + // The current status of this explorer + Status status; }; } } \ No newline at end of file From 79641ef1310d99a187fba3083fba9c3c6dddfb26 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 1 Apr 2020 15:59:31 +0200 Subject: [PATCH 19/40] Started to make the BeliefMdpExplorer more flexible, allowing to restart the exploration --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 224 ++++++++++++++------ src/storm-pomdp/storage/BeliefManager.h | 4 +- 2 files changed, 162 insertions(+), 66 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 86f49fe02..e13e20cf3 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -11,6 +11,7 @@ #include "storm/api/verification.h" #include "storm/storage/BitVector.h" +#include "storm/storage/SparseMatrix.h" #include "storm/utility/macros.h" #include "storm-pomdp/storage/BeliefManager.h" #include "storm/utility/SignalHandler.h" @@ -19,6 +20,7 @@ #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" + namespace storm { namespace builder { template @@ -46,16 +48,17 @@ namespace storm { // Reset data from potential previous explorations mdpStateToBeliefIdMap.clear(); beliefIdToMdpStateMap.clear(); - beliefIdsWithMdpState.clear(); - beliefIdsWithMdpState.grow(beliefManager->getNumberOfBeliefIds(), false); + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + mdpStatesToExplore.clear(); lowerValueBounds.clear(); upperValueBounds.clear(); values.clear(); - mdpTransitionsBuilder = storm::storage::SparseMatrixBuilder(0, 0, 0, true, true); - currentRowCount = 0; - startOfCurrentRowGroup = 0; + exploredMdpTransitions.clear(); + exploredChoiceIndices.clear(); mdpActionRewards.clear(); exploredMdp = nullptr; + currentMdpState = noState(); // Add some states with special treatment (if requested) if (extraBottomStateValue) { @@ -63,10 +66,8 @@ namespace storm { mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); - startOfCurrentRowGroup = currentRowCount; - mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); - mdpTransitionsBuilder.addNextValue(currentRowCount, extraBottomState.get(), storm::utility::one()); - ++currentRowCount; + internalAddRowGroupIndex(); + internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); } else { extraBottomState = boost::none; } @@ -75,10 +76,8 @@ namespace storm { mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); - startOfCurrentRowGroup = currentRowCount; - mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); - mdpTransitionsBuilder.addNextValue(currentRowCount, extraTargetState.get(), storm::utility::one()); - ++currentRowCount; + internalAddRowGroupIndex(); + internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(extraTargetState.get(), true); @@ -89,24 +88,62 @@ namespace storm { // Set up the initial state. initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); } + + /*! + * Restarts the exploration to allow re-exploring each state. + * After calling this, the "currently explored" MDP has the same number of states and choices as the "old" one, but the choices are still empty + * This method inserts the initial state of the MDP in the exploration queue. + * While re-exploring, the reference to the old MDP remains valid. + */ + void restartExploration() { + STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); + // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. + exploredBeliefIds.clear(); + exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); + exploredMdpTransitions.clear(); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices); + exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); + mdpActionRewards.clear(); + if (exploredMdp->hasRewardModel()) { + // Can be overwritten during exploration + mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector(); + } + targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false); + mdpStatesToExplore.clear(); + + // The extra states are not changed + if (extraBottomState) { + currentMdpState = extraBottomState.get(); + restoreOldBehaviorAtCurrentState(0); + } + if (extraTargetState) { + currentMdpState = extraTargetState.get(); + restoreOldBehaviorAtCurrentState(0); + } + currentMdpState = noState(); + + // Set up the initial state. + initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); + } bool hasUnexploredState() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - return !beliefIdsToExplore.empty(); + return !mdpStatesToExplore.empty(); } BeliefId exploreNextState() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - // Set up the matrix builder - finishCurrentRow(); - startOfCurrentRowGroup = currentRowCount; - mdpTransitionsBuilder.newRowGroup(startOfCurrentRowGroup); - ++currentRowCount; // Pop from the queue. - auto result = beliefIdsToExplore.front(); - beliefIdsToExplore.pop_front(); - return result; + currentMdpState = mdpStatesToExplore.front(); + mdpStatesToExplore.pop_front(); + + if (!currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + + return mdpStateToBeliefIdMap[currentMdpState]; } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { @@ -114,7 +151,7 @@ namespace storm { // We first insert the entries of the current row in a separate map. // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; if (!storm::utility::isZero(bottomStateValue)) { STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); internalAddTransition(row, extraBottomState.get(), bottomStateValue); @@ -127,7 +164,7 @@ namespace storm { void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } @@ -145,24 +182,24 @@ namespace storm { // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) MdpStateType column; if (ignoreNewBeliefs) { - column = getMdpState(transitionTarget); + column = getExploredMdpState(transitionTarget); if (column == noState()) { return false; } } else { column = getOrAddMdpState(transitionTarget); } - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, column, value); return true; } void computeRewardAtCurrentState(uint64 const& localActionIndex, ValueType extraReward = storm::utility::zero()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - if (currentRowCount >= mdpActionRewards.size()) { - mdpActionRewards.resize(currentRowCount, storm::utility::zero()); + if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); } - uint64_t row = startOfCurrentRowGroup + localActionIndex; + uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward; } @@ -178,11 +215,64 @@ namespace storm { truncatedStates.set(getCurrentMdpState(), true); } + bool currentStateHasOldBehavior() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); + } + + /*! + * Inserts transitions and rewards at the given action as in the MDP of the previous exploration. + * Does NOT set whether the state is truncated and/or target. + * Will add "old" states that have not been considered before into the exploration queue + * @param localActionIndex + */ + void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); + uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); + + // Insert the transitions + for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { + internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); + // Check whether exploration is needed + auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()]; + if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state + if (!exploredBeliefIds.get(beliefId)) { + // This belief needs exploration + exploredBeliefIds.set(beliefId, true); + mdpStatesToExplore.push_back(transition.getColumn()); + } + } + } + + // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values + } + void finishExploration() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + // Finish the last row grouping in case the last explored state was new + if (!currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } + // Create the tranistion matrix - finishCurrentRow(); - auto mdpTransitionMatrix = mdpTransitionsBuilder.build(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), getCurrentNumberOfMdpStates()); + uint64_t entryCount = 0; + for (auto const& row : exploredMdpTransitions) { + entryCount += row.size(); + } + storm::storage::SparseMatrixBuilder builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true, getCurrentNumberOfMdpStates()); + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + builder.newRowGroup(rowIndex); + for (; rowIndex < groupEnd; ++rowIndex) { + for (auto const& entry : exploredMdpTransitions[rowIndex]) { + builder.addNextValue(rowIndex, entry.first, entry.second); + } + } + } + auto mdpTransitionMatrix = builder.build(); // Create a standard labeling storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates()); @@ -212,13 +302,18 @@ namespace storm { } MdpStateType getCurrentNumberOfMdpStates() const { - STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); } MdpStateType getCurrentNumberOfMdpChoices() const { - STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); - return currentRowCount; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredMdpTransitions.size(); + } + + MdpStateType getStartOfCurrentRowGroup() const { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return exploredChoiceIndices.back(); } ValueType getLowerValueBoundAtCurrentState() const { @@ -291,7 +386,8 @@ namespace storm { } MdpStateType getCurrentMdpState() const { - return mdpTransitionsBuilder.getCurrentRowGroupCount() - 1; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return currentMdpState; } MdpStateType getCurrentBeliefId() const { @@ -299,27 +395,20 @@ namespace storm { } void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { - // We first insert the entries of the current row in a separate map. - // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) - STORM_LOG_ASSERT(row >= currentRowCount - 1, "Trying to insert in an already completed row."); - if (row >= currentRowCount) { - // We are going to start a new row, so insert the entries of the old one - finishCurrentRow(); - currentRowCount = row + 1; + STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row."); + if (row == exploredMdpTransitions.size()) { + exploredMdpTransitions.emplace_back(); } - STORM_LOG_ASSERT(mdpTransitionsBuilderCurrentRowEntries.count(column) == 0, "Trying to insert multiple transitions to the same state."); - mdpTransitionsBuilderCurrentRowEntries[column] = value; + STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state."); + exploredMdpTransitions[row][column] = value; } - void finishCurrentRow() { - for (auto const& entry : mdpTransitionsBuilderCurrentRowEntries) { - mdpTransitionsBuilder.addNextValue(currentRowCount - 1, entry.first, entry.second); - } - mdpTransitionsBuilderCurrentRowEntries.clear(); + void internalAddRowGroupIndex() { + exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices()); } - MdpStateType getMdpState(BeliefId const& beliefId) const { - if (beliefId < beliefIdsWithMdpState.size() && beliefIdsWithMdpState.get(beliefId)) { + MdpStateType getExploredMdpState(BeliefId const& beliefId) const { + if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) { return beliefIdToMdpStateMap.at(beliefId); } else { return noState(); @@ -336,20 +425,28 @@ namespace storm { } MdpStateType getOrAddMdpState(BeliefId const& beliefId) { - beliefIdsWithMdpState.grow(beliefId + 1, false); - if (beliefIdsWithMdpState.get(beliefId)) { + exploredBeliefIds.grow(beliefId + 1, false); + if (exploredBeliefIds.get(beliefId)) { return beliefIdToMdpStateMap[beliefId]; } else { - // Add a new MDP state - beliefIdsWithMdpState.set(beliefId, true); + // This state needs exploration + exploredBeliefIds.set(beliefId, true); + + // If this is a restart of the exploration, we still might have an MDP state for the belief + if (exploredMdp) { + auto findRes = beliefIdToMdpStateMap.find(beliefId); + if (findRes != beliefIdToMdpStateMap.end()) { + mdpStatesToExplore.push_back(findRes->second); + return findRes->second; + } + } + // At this poind we need to add a new MDP state MdpStateType result = getCurrentNumberOfMdpStates(); assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); mdpStateToBeliefIdMap.push_back(beliefId); beliefIdToMdpStateMap[beliefId] = result; - // This new belief needs exploration - beliefIdsToExplore.push_back(beliefId); - insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId)); + mdpStatesToExplore.push_back(result); return result; } } @@ -358,15 +455,14 @@ namespace storm { std::shared_ptr beliefManager; std::vector mdpStateToBeliefIdMap; std::map beliefIdToMdpStateMap; - storm::storage::BitVector beliefIdsWithMdpState; + storm::storage::BitVector exploredBeliefIds; // Exploration information - std::deque beliefIdsToExplore; - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder; - std::map mdpTransitionsBuilderCurrentRowEntries; + std::deque mdpStatesToExplore; + std::vector> exploredMdpTransitions; + std::vector exploredChoiceIndices; std::vector mdpActionRewards; - uint64_t startOfCurrentRowGroup; - uint64_t currentRowCount; + uint64_t currentMdpState; // Special states during exploration boost::optional extraTargetState; diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 9cb7c039c..8f0dcd225 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -324,8 +324,8 @@ namespace storm { } std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { - std::map destinations; // The belief ids should be ordered - // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds + std::map destinations; + // TODO: Output as vector? BeliefType belief = getBelief(beliefId); From 3041b881d44eb4e53f8c4a3dda7817456eadef81 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 1 Apr 2020 22:34:47 +0200 Subject: [PATCH 20/40] Beginning of dropUnreachableStates() --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 41 +++++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index e13e20cf3..bb53c61c6 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -287,20 +287,53 @@ namespace storm { std::unordered_map> mdpRewardModels; if (!mdpActionRewards.empty()) { mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); - mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); + mdpRewardModels.emplace("default", + storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(mdpActionRewards))); } - + storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; } - + + void dropUnreachableStates() { + STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); + storm::storage::BitVector reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), {initialMdpState}), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), targetStates); + auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates); + auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates); + // TODO reward model + storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + + std::vector reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits()); + std::vector reachableLowerValueBounds(reachableStates.getNumberOfSetBits()); + std::vector reachableUpperValueBounds(reachableStates.getNumberOfSetBits()); + std::vector reachableValues(reachableStates.getNumberOfSetBits()); + for (uint64_t state = 0; state < reachableStates.size(); ++state) { + if (reachableStates[state]) { + reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]); + reachableLowerValueBounds.push_back(lowerValueBounds[state]); + reachableUpperValueBounds.push_back(upperValueBounds[state]); + reachableValues.push_back(values[state]); + } + //TODO drop BeliefIds from exploredBeliefIDs? + } + std::map reachableBeliefIdToMdpStateMap; + for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) { + reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state; + } + mdpStateToBeliefIdMap = reachableMdpStateToBeliefIdMap; + beliefIdToMdpStateMap = reachableBeliefIdToMdpStateMap; + } + std::shared_ptr> getExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet."); return exploredMdp; } - + MdpStateType getCurrentNumberOfMdpStates() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); return mdpStateToBeliefIdMap.size(); From 62c905fc583ef215d3dcd885181f0d99f816a5bc Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Thu, 2 Apr 2020 20:05:00 +0200 Subject: [PATCH 21/40] Added basis for rewards in dropUnreachableStates() --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 28 +++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index bb53c61c6..426eff188 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -298,28 +298,42 @@ namespace storm { void dropUnreachableStates() { STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); - storm::storage::BitVector reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), {initialMdpState}), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), targetStates); + auto reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), std::vector{initialMdpState}), + storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), + getExploredMdp()->getStateLabeling().getStates("target")); auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates); auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates); - // TODO reward model - storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling)); - exploredMdp = std::make_shared>(std::move(modelComponents)); - std::vector reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits()); std::vector reachableLowerValueBounds(reachableStates.getNumberOfSetBits()); std::vector reachableUpperValueBounds(reachableStates.getNumberOfSetBits()); std::vector reachableValues(reachableStates.getNumberOfSetBits()); + std::vector reachableMdpActionRewards; for (uint64_t state = 0; state < reachableStates.size(); ++state) { if (reachableStates[state]) { reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]); reachableLowerValueBounds.push_back(lowerValueBounds[state]); reachableUpperValueBounds.push_back(upperValueBounds[state]); reachableValues.push_back(values[state]); + if (getExploredMdp()->hasRewardModel()) { + //TODO FIXME is there some mismatch with the indices here? + for (uint64_t i = 0; i < getExploredMdp()->getTransitionMatrix().getRowGroupSize(state); ++i) { + reachableMdpActionRewards.push_back(getExploredMdp()->getUniqueRewardModel().getStateActionRewardVector()[state + i]); + } + } } //TODO drop BeliefIds from exploredBeliefIDs? } + std::unordered_map> mdpRewardModels; + if (!reachableMdpActionRewards.empty()) { + //reachableMdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + mdpRewardModels.emplace("default", + storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(reachableMdpActionRewards))); + } + storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling), + std::move(mdpRewardModels)); + exploredMdp = std::make_shared>(std::move(modelComponents)); + std::map reachableBeliefIdToMdpStateMap; for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) { reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state; From c2ddea14806cfabc81e1b956ced220129e6f463b Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 3 Apr 2020 12:41:55 +0200 Subject: [PATCH 22/40] First (re-) implementation of refinement. (probably needs some testing/debugging) --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 60 +- .../ApproximatePOMDPModelchecker.cpp | 712 +++++------------- .../ApproximatePOMDPModelchecker.h | 141 +--- 3 files changed, 267 insertions(+), 646 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 426eff188..2a97c5e05 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -43,6 +43,10 @@ namespace storm { } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; + BeliefManagerType const& getBeliefManager() const { + return *beliefManager; + } + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { status = Status::Exploring; // Reset data from potential previous explorations @@ -101,7 +105,7 @@ namespace storm { exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); exploredMdpTransitions.clear(); - exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); mdpActionRewards.clear(); if (exploredMdp->hasRewardModel()) { @@ -235,7 +239,7 @@ namespace storm { for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); // Check whether exploration is needed - auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()]; + auto beliefId = getBeliefId(transition.getColumn()); if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state if (!exploredBeliefIds.get(beliefId)) { // This belief needs exploration @@ -397,6 +401,10 @@ namespace storm { status = Status::ModelChecked; } + bool hasComputedValues() const { + return status == Status::ModelChecked; + } + std::vector const& getValuesOfExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); return values; @@ -408,6 +416,51 @@ namespace storm { return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; } + MdpStateType getBeliefId(MdpStateType exploredMdpState) const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap[exploredMdpState]; + } + + struct SuccessorObservationInformation { + SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { + // Intentionally left empty. + } + + void join(SuccessorObservationInformation other) { + observationProbability += other.observationProbability; + maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); + successorWithObsCount += other.successorWithObsCount; + } + + ValueType observationProbability; /// The probability we move to the corresponding observation. + ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. + uint64_t successorWithObsCount; /// The number of successors with this observation + }; + + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + } + + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (beliefId != beliefManager->noId()) { + auto const& obs = beliefManager->getBeliefObservation(beliefId); + SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); + auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); + if (!obsInsertion.second) { + // There already is an entry for this observation, so join the two informations + obsInsertion.first->second.join(info); + } + } + } + } + + private: MdpStateType noState() const { return std::numeric_limits::max(); @@ -438,7 +491,8 @@ namespace storm { } MdpStateType getCurrentBeliefId() const { - return mdpStateToBeliefIdMap[getCurrentMdpState()]; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return getBeliefId(getCurrentMdpState()); } void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 436fc3e09..4526607d4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -171,8 +171,9 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); - if (approx) { + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx); + if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; @@ -185,8 +186,9 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); - if (approx) { + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); + if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.upperBound : result.lowerBound; @@ -200,23 +202,27 @@ namespace storm { // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto beliefManager = std::make_shared(pomdp, options.numericPrecision); + auto overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + auto underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { - beliefManager->setRewardModel(rewardModelName); + overApproxBeliefManager->setRewardModel(rewardModelName); + underApproxBeliefManager->setRewardModel(rewardModelName); } // OverApproximaion - auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); - if (!overApproximation) { + auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (!overApproximation->hasComputedValues()) { return; } ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; overApproxValue = overApproximation->getComputedValueAtInitialState(); - // UnderApproximation TODO: use same belief manager?) - uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); - auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); - if (!underApproximation) { + // UnderApproximation + uint64_t underApproxSizeThreshold = std::max(overApproximation->getExploredMdp()->getNumberOfStates(), 10); + auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (!underApproximation->hasComputedValues()) { return; } ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; @@ -225,379 +231,165 @@ namespace storm { // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; + ValueType refinementAggressiveness = storm::utility::zero(); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } - // TODO the actual refinement - /* - // choose which observation(s) to refine - std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); - std::vector beliefCount(pomdp.getNrObservations(), 0); - bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin(); - while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) { - auto currentBelief = res->beliefList[underApproxStateBeliefIter->second]; - beliefCount[currentBelief.observation] += 1; - bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second); - if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) { - // If there is an over-approximate value for the belief, use it - auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first]; - obsAccumulator[currentBelief.observation] += diff; - } else { - //otherwise, we approximate a value TODO this is critical, we have to think about it - auto overApproxValue = storm::utility::zero(); - auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); - auto subSimplex = temp.first; - auto lambdas = temp.second; - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); - bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); - if (approxIter != res->overApproxBeliefStateMap.left.end()) { - overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; - } else { - overApproxValue += lambdas[j]; - } - } - } - obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first]; - } - ++underApproxStateBeliefIter; - } - - - //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - //} - changedObservations.clear(); - - //TODO think about some other scoring methods - auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); - //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) { - lastMinScore = maxAvgDifference; - auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - STORM_PRINT("Set all to " << maxRes + 1 << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - observationResolutionVector[i] = maxRes + 1; - changedObservations.insert(i); + + // Refine over-approximation + refinementAggressiveness *= storm::utility::convertNumber(1.1);; + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues()) { + overApproxValue = overApproximation->getComputedValueAtInitialState(); + } else { + break; } - //} else { - // lastMinScore = std::min(maxAvgDifference, lastMinScore); - // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { - //STORM_PRINT(" *** ") - // observationResolutionVector[i] += 1; - // changedObservations.insert(i); - // } - //STORM_PRINT(std::endl) - // } - //} - if (underApproxModelSize < std::numeric_limits::max() - 101) { - underApproxModelSize += 100; + + // Refine under-approximation + underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues()) { + underApproxValue = underApproximation->getComputedValueAtInitialState(); + } else { + break; } - STORM_PRINT( - "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); - //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || - cc.isEqual(res->underApproxValue, res->overApproxValue), - "The value for the under-approximation is larger than the value for the over-approximation."); - */ ++statistics.refinementSteps.get(); } } + /*! + * Heuristically rates the quality of the approximation described by the given successor observation info. + * Here, 0 means a bad approximation and 1 means a good approximation. + */ + template + typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) { + auto n = storm::utility::convertNumber(info.successorWithObsCount); + auto one = storm::utility::one(); + + // Create the actual rating for this observation at this choice from the given info + ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + return obsChoiceRating; + } + + template + std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation) { + uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); + + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) { + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + auto const& obs = obsInfo.first; + ValueType obsChoiceRating = rateObservation(obsInfo.second); + + // The rating of the observation will be the minimum over all choice-based observation ratings + resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + } + } + return resultingRatings; + } + template - std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { + void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); + statistics.overApproximationBuildTime.start(); - storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (computeRewards) { - explorer.startNewExploration(storm::utility::zero()); + storm::storage::BitVector refinedObservations; + if (!refine) { + // If we build the model from scratch, we first have to setup the explorer for the overApproximation. + if (computeRewards) { + overApproximation->startNewExploration(storm::utility::zero()); + } else { + overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } } else { - explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); + // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. + auto obsRatings = getObservationRatings(overApproximation); + ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); + // Potentially increase the aggressiveness so that at least one observation actually gets refinement. + *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); + refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); + STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + for (auto const& obs : refinedObservations) { + // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) + ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); + observationResolutionVector[obs] += storm::utility::convertNumber(storm::utility::ceil(incrementValue)); + } + overApproximation->restartExploration(); } - // Expand the beliefs to generate the grid on-the-fly - while (explorer.hasUnexploredState()) { - uint64_t currId = explorer.exploreNextState(); + // Start exploration + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + while (overApproximation->hasUnexploredState()) { + uint64_t currId = overApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - explorer.setCurrentStateIsTarget(); - explorer.addSelfloopTransition(); + overApproximation->setCurrentStateIsTarget(); + overApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + if (storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; - explorer.setCurrentStateIsTruncated(); + overApproximation->setCurrentStateIsTruncated(); } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - for (auto const& successor : successorGridPoints) { - bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); - } - } - if (stopExploration) { - if (computeRewards) { - explorer.addTransitionsToExtraStates(action, truncationProbability); - } else { - explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); - } - } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - explorer.computeRewardAtCurrentState(action, truncationValueBound); - } - } - } - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildAborted = true; - break; - } - } - statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildTime.stop(); - return nullptr; - } - - explorer.finishExploration(); - statistics.overApproximationBuildTime.stop(); - - statistics.overApproximationCheckTime.start(); - explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); - statistics.overApproximationCheckTime.stop(); - - return std::make_shared(std::move(explorer)); - } - - template - void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { - /*TODO: - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, - uint64_t maxUaModelSize) { - bool initialBoundMapsSet = overApproximationMap && underApproximationMap; - std::map initialOverMap; - std::map initialUnderMap; - if (initialBoundMapsSet) { - initialOverMap = overApproximationMap.value(); - initialUnderMap = underApproximationMap.value(); - } - // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively - std::map>> subSimplexCache; - std::map> lambdaCache; - - // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - statistics.overApproximationBuildTime.start(); - - uint64_t nextBeliefId = refinementComponents->beliefList.size(); - uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; // The MDP states where the observation has changed - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { - relevantStates.insert(iter.second); - } - } - - std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed - for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { - for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { - for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( - state, row).begin(); - itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) { - if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) { - statesAndActionsToCheck.insert(std::make_pair(state, row)); - break; - } - } - } - } - - std::deque beliefsToBeExpanded; - - std::map, std::map> transitionsStateActionPair; - for (auto const &stateActionPair : statesAndActionsToCheck) { - auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); - auto action = stateActionPair.second; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], - action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - // Expand and triangulate the successor - uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - //TODO add caching - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - } - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; + // Check whether we expand this state/action pair + // We always expand if we are not doing refinement of if the state was not available in the "old" MDP. + // Otherwise, a heuristic decides. + bool expandStateAction = true; + if (refine && overApproximation->currentStateHasOldBehavior()) { + // Compute a rating of the current state/action pair + ValueType stateActionRating = storm::utility::one(); + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + if (refinedObservations.get(obsInfo.first)) { + ValueType obsRating = rateObservation(obsInfo.second); + stateActionRating = std::min(stateActionRating, obsRating); } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } + // Only refine if this rating is below the doubled refinementAggressiveness + expandStateAction = stateActionRating < storm::utility::convertNumber(2.0) * (*refinementAggressiveness); } - } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[stateActionPair] = transitionInActionBelief; - } - } - - std::set stoppedExplorationStateSet; - - // Expand newly added beliefs - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = refinementComponents->beliefIsTarget[currId]; - - if (initialBoundMapsSet && - cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { - STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId] - << std::endl) - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]}, - {0, storm::utility::one() - - weightedSumOverMap[currId]}}; - stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId)); - continue; - } - - if (isTarget) { - // Depending on whether we compute rewards, we select the right initial result - // MDP stuff - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = - {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one()}}; - } else { - uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, - nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + if (expandStateAction) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); } } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; - } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; - } + if (stopExploration) { + if (computeRewards) { + overApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + overApproximation->computeRewardAtCurrentState(action, truncationValueBound); + } + } else { + // Do not refine here + overApproximation->restoreOldBehaviorAtCurrentState(action); } } } @@ -606,173 +398,85 @@ namespace storm { break; } } - - statistics.overApproximationStates = nextStateId; + // TODO: Drop unreachable states (sometimes?) + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); - // Return the result from the old refinement step - return refinementComponents; - } - storm::models::sparse::StateLabeling mdpLabeling(nextStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); - mdpLabeling.addLabelToState("target", 1); - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); - auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix(); - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 0, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 1, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - for (uint64_t state = 2; state < nextStateId; ++state) { - smb.newRowGroup(currentRow); - //STORM_PRINT("Loop State: " << state << std::endl) - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front()); - bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)]; - for (uint64_t action = 0; action < numChoices; ++action) { - if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) { - for (auto const &entry : oldTransitionMatrix.getRow(state, action)) { - smb.addNextValue(currentRow, entry.getColumn(), entry.getValue()); - } - } else { - for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) { - smb.addNextValue(currentRow, iter.first, iter.second); - } - } - ++currentRow; - if (isTarget) { - // If the state is a target, we only have one action, thus we add the target label and stop the iteration - mdpLabeling.addLabelToState("target", state); - break; - } - if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) { - break; - } - } - ++currentRowGroup; - } - storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); + return; } - overApproxMdp.printModelInformationToStream(std::cout); - statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); + overApproximation->finishExploration(); + statistics.overApproximationBuildTime.stop(); + statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return refinementComponents; // Return the result from the previous iteration - } - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl) - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, - refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - - return std::make_shared>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, - refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, - underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); - } - */ } template - std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { + void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); - storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (computeRewards) { - explorer.startNewExploration(storm::utility::zero()); + if (!underApproximation->hasComputedValues()) { + // Build a new under approximation + if (computeRewards) { + underApproximation->startNewExploration(storm::utility::zero()); + } else { + underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } } else { - explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); + // Restart the building process + underApproximation->restartExploration(); } - // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } - while (explorer.hasUnexploredState()) { - uint64_t currId = explorer.exploreNextState(); + // Expand the beliefs + while (underApproximation->hasUnexploredState()) { + uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - explorer.setCurrentStateIsTarget(); - explorer.addSelfloopTransition(); + underApproximation->setCurrentStateIsTarget(); + underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { - stopExploration = true; - explorer.setCurrentStateIsTruncated(); - } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { - stopExploration = true; - explorer.setCurrentStateIsTruncated(); + if (!underApproximation->currentStateHasOldBehavior()) { + if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successors = beliefManager->expand(currId, action); - for (auto const& successor : successors) { - bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); + // Always restore old behavior if available + if (underApproximation->currentStateHasOldBehavior()) { + underApproximation->restoreOldBehaviorAtCurrentState(action); + } else { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); + } + } + if (stopExploration) { + if (computeRewards) { + underApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } } - } - if (stopExploration) { if (computeRewards) { - explorer.addTransitionsToExtraStates(action, truncationProbability); - } else { - explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + underApproximation->computeRewardAtCurrentState(action, truncationValueBound); } } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - explorer.computeRewardAtCurrentState(action, truncationValueBound); - } } } if (storm::utility::resources::isTerminate()) { @@ -780,25 +484,19 @@ namespace storm { break; } } - statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); - return nullptr; + return; } - explorer.finishExploration(); + underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); statistics.underApproximationCheckTime.start(); - explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); + underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - return std::make_shared(std::move(explorer)); - } - - template - void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { - // TODO } template class ApproximatePOMDPModelchecker>; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 0d59ac31a..7fbd2ab5e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -18,32 +18,6 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - /** - * Struct containing information which is supposed to be persistent over multiple refinement steps - * - */ - template> - struct RefinementComponents { - std::shared_ptr> overApproxModelPtr; - ValueType overApproxValue; - ValueType underApproxValue; - std::map overApproxMap; - std::map underApproxMap; - std::vector> beliefList; - std::vector> beliefGrid; - std::vector beliefIsTarget; - bsmap_type overApproxBeliefStateMap; - bsmap_type underApproxBeliefStateMap; - uint64_t initialBeliefId; - }; - - template> - struct UnderApproxComponents { - ValueType underApproxValue; - std::map underApproxMap; - bsmap_type underApproxBeliefStateMap; - }; - template class ApproximatePOMDPModelchecker { public: @@ -103,121 +77,16 @@ namespace storm { /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - std::shared_ptr computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager); - - void refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - std::shared_ptr computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager); - - void refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - - -#ifdef REMOVE_THIS - /** - * Helper to compute an underapproximation of the reachability property. - * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. - * - * @param beliefList vector containing already generated beliefs - * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target - * @param targetObservations set of target observations - * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state - * @param min true if minimum value is to be computed - * @param computeReward true if rewards are to be computed - * @param maxModelSize number of states up until which the belief support should be unrolled - * @return struct containing the components generated during the under approximation - */ - std::unique_ptr> computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, bool computeReward, - uint64_t maxModelSize); - std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, - std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds); - - /** - * Constructs the initial belief for the given POMDP - * - * @param pomdp the POMDP - * @param id the id the initial belief is given - * @return a belief representing the initial belief - */ - storm::pomdp::Belief - getInitialBelief(uint64_t id); - - - /** - * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation - * - * @param probabilities the probability distribution of the belief - * @param gridResolution the resolution used for the belief - * @param nrStates number of states in the POMDP - * @return a pair containing: 1) the subsimplices 2) the lambda values - */ - std::pair>, std::vector> - computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); + void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - - /** - * Helper method to get the probabilities to be in a state with each observation after performing an action - * - * @param belief the belief in which the action is performed - * @param actionIndex the index of the action to be performed - * @return mapping from each observation to the probability to be in a state with that observation after performing the action - */ - std::map computeObservationProbabilitiesAfterAction(storm::pomdp::Belief &belief, - uint64_t actionIndex); - - /** - * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. - * If the belief does not exist yet, it is created and added to the list of all beliefs - * - * @param beliefList data structure to store all generated beliefs - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param targetObservations set of target observations - * @param belief the starting belief - * @param actionIndex the index of the action to be performed - * @param observation the observation after the action was performed - * @return the resulting belief (observation and distribution) - */ - uint64_t getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - storm::pomdp::Belief &belief, - uint64_t actionIndex, uint32_t observation, uint64_t id); - - /** - * Helper to get the id of a Belief stored in a given vector structure - * - * @param grid the vector on which the lookup is performed - * @param observation the observation of the belief - * @param probabilities the probability distribution over the POMDP states of the Belief - * @return if the belief was found in the vector, the belief's ID, otherwise -1 - */ - uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::map &probabilities); - - /** - * Helper method to build the transition matrix from a data structure containing transations - * - * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) - * @return sparseMatrix representing the transitions - */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); - - /** - * Get the reward for performing an action in a given belief - * - * @param action the index of the action to be performed - * @param belief the belief in which the action is performed - * @return the reward earned by performing the action in the belief - */ - ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); - ValueType getRewardAfterAction(uint64_t action, std::map const& belief); -#endif //REMOVE_THIS + ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info); + + std::vector getObservationRatings(std::shared_ptr const& overApproximation); struct Statistics { Statistics(); From 6540b486e732a7921d08852ce9f7c3450e944729 Mon Sep 17 00:00:00 2001 From: Matthias Volk Date: Mon, 6 Apr 2020 14:31:47 +0200 Subject: [PATCH 23/40] NotSupportedException when using drn export for symbolic models --- src/storm-cli-utilities/model-handling.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/storm-cli-utilities/model-handling.h b/src/storm-cli-utilities/model-handling.h index 6dad6fbb0..e6508c190 100644 --- a/src/storm-cli-utilities/model-handling.h +++ b/src/storm-cli-utilities/model-handling.h @@ -566,6 +566,10 @@ namespace storm { if (ioSettings.isExportExplicitSet()) { storm::api::exportSparseModelAsDrn(model, ioSettings.getExportExplicitFilename(), input.model ? input.model.get().getParameterNames() : std::vector()); } + + if (ioSettings.isExportDdSet()) { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Exporting in drdd format is only supported for DDs."); + } if (ioSettings.isExportDotSet()) { storm::api::exportSparseModelAsDot(model, ioSettings.getExportDotFilename(), ioSettings.getExportDotMaxWidth()); @@ -576,6 +580,10 @@ namespace storm { void exportDdModel(std::shared_ptr> const& model, SymbolicInput const& input) { auto ioSettings = storm::settings::getModule(); + if (ioSettings.isExportExplicitSet()) { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Exporting in drn format is only supported for sparse models."); + } + if (ioSettings.isExportDdSet()) { storm::api::exportSparseModelAsDrdd(model, ioSettings.getExportDdFilename()); } From c3847d05afd0bce9aae4d80047a1a1543846f263 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 06:37:01 +0200 Subject: [PATCH 24/40] Scaling the rating of an observation with the current resolution. --- .../ApproximatePOMDPModelchecker.cpp | 18 +++++++++++------- .../ApproximatePOMDPModelchecker.h | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 4526607d4..2936d9b40 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -264,21 +264,23 @@ namespace storm { * Here, 0 means a bad approximation and 1 means a good approximation. */ template - typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) { + typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution) { auto n = storm::utility::convertNumber(info.successorWithObsCount); auto one = storm::utility::one(); - // Create the actual rating for this observation at this choice from the given info + // Create the rating for this observation at this choice from the given info ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 // Normalize the rating so that it ranges from 0 to 1, where // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined) + obsChoiceRating *= storm::utility::convertNumber(observationResolution) / storm::utility::convertNumber(maxResolution); return obsChoiceRating; } template - std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation) { + std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution) { uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); @@ -289,7 +291,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { auto const& obs = obsInfo.first; - ValueType obsChoiceRating = rateObservation(obsInfo.second); + ValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution); // The rating of the observation will be the minimum over all choice-based observation ratings resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); @@ -303,7 +305,9 @@ namespace storm { STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); - + uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); + STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << "."); + statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; if (!refine) { @@ -315,7 +319,7 @@ namespace storm { } } else { // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. - auto obsRatings = getObservationRatings(overApproximation); + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); // Potentially increase the aggressiveness so that at least one observation actually gets refinement. *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); @@ -356,7 +360,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { if (refinedObservations.get(obsInfo.first)) { - ValueType obsRating = rateObservation(obsInfo.second); + ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution); stateActionRating = std::min(stateActionRating, obsRating); } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 7fbd2ab5e..f895a3138 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -84,9 +84,9 @@ namespace storm { */ void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info); + ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info, uint64_t const& observationResolution, uint64_t const& maxResolution); - std::vector getObservationRatings(std::shared_ptr const& overApproximation); + std::vector getObservationRatings(std::shared_ptr const& overApproximation, std::vector const& observationResolutionVector, uint64_t const& maxResolution); struct Statistics { Statistics(); From c2837bb749668df1cbadf995062d84520495c9e3 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:41:36 +0200 Subject: [PATCH 25/40] ApproximatePOMDPModelchecker: Improved output a bit. --- .../ApproximatePOMDPModelchecker.cpp | 44 ++++++++++++------- .../ApproximatePOMDPModelchecker.h | 2 + 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 2936d9b40..fb95b0838 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -133,6 +133,7 @@ namespace storm { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; + stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.get() << std::endl; stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; } @@ -148,6 +149,7 @@ namespace storm { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; + stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.get() << std::endl; stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } @@ -231,14 +233,16 @@ namespace storm { // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; - ValueType refinementAggressiveness = storm::utility::zero(); + ValueType refinementAggressiveness = storm::utility::convertNumber(0.0); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } + ++statistics.refinementSteps.get(); + STORM_LOG_INFO("Starting refinement step " << statistics.refinementSteps.get() << ". Current difference between lower and upper bound is " << result.diff() << "."); // Refine over-approximation - refinementAggressiveness *= storm::utility::convertNumber(1.1);; + STORM_LOG_DEBUG("Refining over-approximation with aggressiveness " << refinementAggressiveness << "."); buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); if (overApproximation->hasComputedValues()) { overApproxValue = overApproximation->getComputedValueAtInitialState(); @@ -246,16 +250,18 @@ namespace storm { break; } - // Refine under-approximation - underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); - underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); - buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); - if (underApproximation->hasComputedValues()) { - underApproxValue = underApproximation->getComputedValueAtInitialState(); - } else { - break; + if (result.diff() > options.refinementPrecision) { + // Refine under-approximation + underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); + STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues()) { + underApproxValue = underApproximation->getComputedValueAtInitialState(); + } else { + break; + } } - ++statistics.refinementSteps.get(); } } @@ -305,8 +311,9 @@ namespace storm { STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); - uint64_t maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - STORM_LOG_INFO("Refining with maximal resolution " << maxResolution << "."); + + // current maximal resolution (needed for refinement heuristic) + uint64_t oldMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); statistics.overApproximationBuildTime.start(); storm::storage::BitVector refinedObservations; @@ -319,12 +326,12 @@ namespace storm { } } else { // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. - auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, maxResolution); + auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector, oldMaxResolution); ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); // Potentially increase the aggressiveness so that at least one observation actually gets refinement. *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); - STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); for (auto const& obs : refinedObservations) { // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); @@ -332,6 +339,7 @@ namespace storm { } overApproximation->restartExploration(); } + statistics.overApproximationMaxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); // Start exploration std::map gatheredSuccessorObservations; // Declare here to avoid reallocations @@ -360,7 +368,7 @@ namespace storm { overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); for (auto const& obsInfo : gatheredSuccessorObservations) { if (refinedObservations.get(obsInfo.first)) { - ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], maxResolution); + ValueType obsRating = rateObservation(obsInfo.second, observationResolutionVector[obsInfo.first], oldMaxResolution); stateActionRating = std::min(stateActionRating, obsRating); } } @@ -411,7 +419,8 @@ namespace storm { overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); - + STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states."); + statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); @@ -421,6 +430,7 @@ namespace storm { void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); + statistics.underApproximationStateLimit = maxStateCount; if (!underApproximation->hasComputedValues()) { // Build a new under approximation if (computeRewards) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f895a3138..1d5521b6a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -96,11 +96,13 @@ namespace storm { bool overApproximationBuildAborted; storm::utility::Stopwatch overApproximationBuildTime; storm::utility::Stopwatch overApproximationCheckTime; + boost::optional overApproximationMaxResolution; boost::optional underApproximationStates; bool underApproximationBuildAborted; storm::utility::Stopwatch underApproximationBuildTime; storm::utility::Stopwatch underApproximationCheckTime; + boost::optional underApproximationStateLimit; bool aborted; }; From 961baa43868315b5c8eaef8c21eeffd962748ffb Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:43:33 +0200 Subject: [PATCH 26/40] BeliefMdpExplorer: Various bugfixes for exploration restarts. Unexplored (= unreachable) states are now dropped before building the MDP since we do not get a valid MDP otherwise. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 186 ++++++++++++++------ 1 file changed, 131 insertions(+), 55 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 2a97c5e05..d59b770f0 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -62,33 +62,36 @@ namespace storm { exploredChoiceIndices.clear(); mdpActionRewards.clear(); exploredMdp = nullptr; - currentMdpState = noState(); - + internalAddRowGroupIndex(); // Mark the start of the first row group + // Add some states with special treatment (if requested) if (extraBottomStateValue) { - extraBottomState = getCurrentNumberOfMdpStates(); + currentMdpState = getCurrentNumberOfMdpStates(); + extraBottomState = currentMdpState; mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraBottomStateValue.get(), extraBottomStateValue.get()); - internalAddRowGroupIndex(); internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.get(), storm::utility::one()); + internalAddRowGroupIndex(); } else { extraBottomState = boost::none; } if (extraTargetStateValue) { - extraTargetState = getCurrentNumberOfMdpStates(); + currentMdpState = getCurrentNumberOfMdpStates(); + extraTargetState = currentMdpState; mdpStateToBeliefIdMap.push_back(beliefManager->noId()); insertValueHints(extraTargetStateValue.get(), extraTargetStateValue.get()); - internalAddRowGroupIndex(); internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.get(), storm::utility::one()); + internalAddRowGroupIndex(); targetStates.grow(getCurrentNumberOfMdpStates(), false); targetStates.set(extraTargetState.get(), true); } else { extraTargetState = boost::none; } - + currentMdpState = noState(); + // Set up the initial state. initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief()); } @@ -101,6 +104,7 @@ namespace storm { */ void restartExploration() { STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status."); + status = Status::Exploring; // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid. exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); @@ -124,6 +128,7 @@ namespace storm { if (extraTargetState) { currentMdpState = extraTargetState.get(); restoreOldBehaviorAtCurrentState(0); + targetStates.set(extraTargetState.get(), true); } currentMdpState = noState(); @@ -138,23 +143,22 @@ namespace storm { BeliefId exploreNextState() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + // Mark the end of the previously explored row group. + if (currentMdpState != noState() && !currentStateHasOldBehavior()) { + internalAddRowGroupIndex(); + } // Pop from the queue. currentMdpState = mdpStatesToExplore.front(); mdpStatesToExplore.pop_front(); - if (!currentStateHasOldBehavior()) { - internalAddRowGroupIndex(); - } return mdpStateToBeliefIdMap[currentMdpState]; } void addTransitionsToExtraStates(uint64_t const& localActionIndex, ValueType const& targetStateValue = storm::utility::zero(), ValueType const& bottomStateValue = storm::utility::zero()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - // We first insert the entries of the current row in a separate map. - // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) - + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; if (!storm::utility::isZero(bottomStateValue)) { STORM_LOG_ASSERT(extraBottomState.is_initialized(), "Requested a transition to the extra bottom state but there is none."); @@ -168,6 +172,7 @@ namespace storm { void addSelfloopTransition(uint64_t const& localActionIndex = 0, ValueType const& value = storm::utility::one()) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); uint64_t row = getStartOfCurrentRowGroup() + localActionIndex; internalAddTransition(row, getCurrentMdpState(), value); } @@ -182,8 +187,8 @@ namespace storm { */ bool addTransitionToBelief(uint64_t const& localActionIndex, BeliefId const& transitionTarget, ValueType const& value, bool ignoreNewBeliefs) { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - // We first insert the entries of the current row in a separate map. - // This is to ensure that entries are sorted in the right way (as required for the transition matrix builder) + STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + MdpStateType column; if (ignoreNewBeliefs) { column = getExploredMdpState(transitionTarget); @@ -221,6 +226,7 @@ namespace storm { bool currentStateHasOldBehavior() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state."); return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates(); } @@ -232,6 +238,8 @@ namespace storm { */ void restoreOldBehaviorAtCurrentState(uint64_t const& localActionIndex) { STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any."); + STORM_LOG_ASSERT(localActionIndex < exploredChoiceIndices[currentMdpState + 1] - exploredChoiceIndices[currentMdpState], "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP."); + uint64_t choiceIndex = exploredChoiceIndices[getCurrentMdpState()] + localActionIndex; STORM_LOG_ASSERT(choiceIndex < exploredChoiceIndices[getCurrentMdpState() + 1], "Invalid local action index."); @@ -255,10 +263,27 @@ namespace storm { void finishExploration() { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + // Complete the exploration // Finish the last row grouping in case the last explored state was new if (!currentStateHasOldBehavior()) { internalAddRowGroupIndex(); } + // Resize state- and choice based vectors to the correct size + targetStates.resize(getCurrentNumberOfMdpStates(), false); + truncatedStates.resize(getCurrentNumberOfMdpStates(), false); + if (!mdpActionRewards.empty()) { + mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); + } + + // We are not exploring anymore + currentMdpState = noState(); + + // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). + // We get rid of these before rebuilding the model + if (exploredMdp) { + dropUnexploredStates(); + } // Create the tranistion matrix uint64_t entryCount = 0; @@ -300,50 +325,101 @@ namespace storm { status = Status::ModelFinished; } - void dropUnreachableStates() { - STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status."); - auto reachableStates = storm::utility::graph::getReachableStates(getExploredMdp()->getTransitionMatrix(), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), std::vector{initialMdpState}), - storm::storage::BitVector(getCurrentNumberOfMdpStates(), true), - getExploredMdp()->getStateLabeling().getStates("target")); - auto reachableTransitionMatrix = getExploredMdp()->getTransitionMatrix().getSubmatrix(true, reachableStates, reachableStates); - auto reachableStateLabeling = getExploredMdp()->getStateLabeling().getSubLabeling(reachableStates); - std::vector reachableMdpStateToBeliefIdMap(reachableStates.getNumberOfSetBits()); - std::vector reachableLowerValueBounds(reachableStates.getNumberOfSetBits()); - std::vector reachableUpperValueBounds(reachableStates.getNumberOfSetBits()); - std::vector reachableValues(reachableStates.getNumberOfSetBits()); - std::vector reachableMdpActionRewards; - for (uint64_t state = 0; state < reachableStates.size(); ++state) { - if (reachableStates[state]) { - reachableMdpStateToBeliefIdMap.push_back(mdpStateToBeliefIdMap[state]); - reachableLowerValueBounds.push_back(lowerValueBounds[state]); - reachableUpperValueBounds.push_back(upperValueBounds[state]); - reachableValues.push_back(values[state]); - if (getExploredMdp()->hasRewardModel()) { - //TODO FIXME is there some mismatch with the indices here? - for (uint64_t i = 0; i < getExploredMdp()->getTransitionMatrix().getRowGroupSize(state); ++i) { - reachableMdpActionRewards.push_back(getExploredMdp()->getUniqueRewardModel().getStateActionRewardVector()[state + i]); - } + void dropUnexploredStates() { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states."); + + STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available."); + // Find the states (and corresponding choices) that were not explored. + // These correspond to "empty" MDP transitions + storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true); + std::vector toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState()); + MdpStateType nextRelevantIndex = 0; + for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) { + uint64_t rowIndex = exploredChoiceIndices[groupIndex]; + // Check first row in group + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); + relevantMdpStates.set(groupIndex, false); + } else { + toRelevantStateIndexMap[groupIndex] = nextRelevantIndex; + ++nextRelevantIndex; + } + uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1]; + // process remaining rows in group + for (++rowIndex; rowIndex < groupEnd; ++rowIndex) { + // Assert that all actions at the current state were consistently explored or unexplored. + STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex), "Actions at 'old' MDP state " << groupIndex << " were only partly explored."); + if (exploredMdpTransitions[rowIndex].empty()) { + relevantMdpChoices.set(rowIndex, false); } } - //TODO drop BeliefIds from exploredBeliefIDs? } - std::unordered_map> mdpRewardModels; - if (!reachableMdpActionRewards.empty()) { - //reachableMdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero()); - mdpRewardModels.emplace("default", - storm::models::sparse::StandardRewardModel(boost::optional>(), std::move(reachableMdpActionRewards))); + + if (relevantMdpStates.full()) { + // All states are relevant so nothing to do + return; } - storm::storage::sparse::ModelComponents modelComponents(std::move(reachableTransitionMatrix), std::move(reachableStateLabeling), - std::move(mdpRewardModels)); - exploredMdp = std::make_shared>(std::move(modelComponents)); - - std::map reachableBeliefIdToMdpStateMap; - for (MdpStateType state = 0; state < reachableMdpStateToBeliefIdMap.size(); ++state) { - reachableBeliefIdToMdpStateMap[reachableMdpStateToBeliefIdMap[state]] = state; + + // Translate various components to the "new" MDP state set + storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates); + { // beliefIdToMdpStateMap + for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { + if (relevantMdpStates.get(belIdToMdpStateIt->second)) { + // Keep current entry and move on to the next one. + ++belIdToMdpStateIt; + } else { + STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); + // Delete current entry and move on to the next one. + // This works because std::map::erase does not invalidate other iterators within the map! + beliefIdToMdpStateMap.erase(belIdToMdpStateIt++); + } + } + } + { // exploredMdpTransitions + storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices); + // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map + // It might payoff to do this when building the matrix. + for (auto& transitions : exploredMdpTransitions) { + std::map newTransitions; + for (auto const& entry : transitions) { + STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state."); + newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second); + } + transitions = std::move(newTransitions); + } } - mdpStateToBeliefIdMap = reachableMdpStateToBeliefIdMap; - beliefIdToMdpStateMap = reachableBeliefIdToMdpStateMap; + { // exploredChoiceIndices + MdpStateType newState = 0; + assert(exploredChoiceIndices[0] == 0u); + // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched. + for (auto const& oldState : relevantMdpStates) { + if (oldState != newState) { + assert(oldState > newState); + uint64_t groupSize = exploredChoiceIndices[oldState + 1] - exploredChoiceIndices[oldState]; + exploredChoiceIndices[newState + 1] = exploredChoiceIndices[newState] + groupSize; + } + ++newState; + } + exploredChoiceIndices.resize(newState + 1); + } + if (!mdpActionRewards.empty()) { + storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices); + } + if (extraBottomState) { + extraBottomState = toRelevantStateIndexMap[extraBottomState.get()]; + } + if (extraTargetState) { + extraTargetState = toRelevantStateIndexMap[extraTargetState.get()]; + } + targetStates = targetStates % relevantMdpStates; + truncatedStates = truncatedStates % relevantMdpStates; + initialMdpState = toRelevantStateIndexMap[initialMdpState]; + + storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates); + storm::utility::vector::filterVectorInPlace(values, relevantMdpStates); + } std::shared_ptr> getExploredMdp() const { @@ -364,7 +440,7 @@ namespace storm { MdpStateType getStartOfCurrentRowGroup() const { STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); - return exploredChoiceIndices.back(); + return exploredChoiceIndices[getCurrentMdpState()]; } ValueType getLowerValueBoundAtCurrentState() const { From 34d6ac9fe1f0e06ab209a51d91ba1adae0aaa3b0 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:44:14 +0200 Subject: [PATCH 27/40] Fixed computing a state limit for the under-approximation. --- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fb95b0838..bea230599 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -252,7 +252,7 @@ namespace storm { if (result.diff() > options.refinementPrecision) { // Refine under-approximation - underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); STORM_LOG_DEBUG("Refining under-approximation with size threshold " << underApproxSizeThreshold << "."); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); From 5cd4281133b986360c7f5583516220c2ab117c3a Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 12:50:12 +0200 Subject: [PATCH 28/40] Further output improvements. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 4 +++- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index d59b770f0..c8f1775ed 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -278,7 +278,7 @@ namespace storm { // We are not exploring anymore currentMdpState = noState(); - + // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build). // We get rid of these before rebuilding the model if (exploredMdp) { @@ -323,6 +323,8 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; + STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << "of which were flagged as truncated)."); + } void dropUnexploredStates() { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index bea230599..eb116fc7b 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -419,7 +419,6 @@ namespace storm { overApproximation->finishExploration(); statistics.overApproximationBuildTime.stop(); - STORM_LOG_DEBUG("Explored " << statistics.overApproximationStates.get() << " states."); statistics.overApproximationCheckTime.start(); overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); From 26864067cf7930078f54eed972096f7db5bd20b8 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 13:04:38 +0200 Subject: [PATCH 29/40] BeliefManager: Made several methods private to hide the actual BeliefType. --- src/storm-pomdp/storage/BeliefManager.h | 164 +++++++++++++----------- 1 file changed, 86 insertions(+), 78 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 8f0dcd225..0731aef35 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -44,6 +44,92 @@ namespace storm { } }; + BeliefId noId() const { + return std::numeric_limits::max(); + } + + bool isEqual(BeliefId const& first, BeliefId const& second) const { + return isEqual(getBelief(first), getBelief(second)); + } + + std::string toString(BeliefId const& beliefId) const { + return toString(getBelief(beliefId)); + } + + + std::string toString(Triangulation const& t) const { + std::stringstream str; + str << "(\n"; + for (uint64_t i = 0; i < t.size(); ++i) { + str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; + } + str <<")\n"; + return str.str(); + } + + template + ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : getBelief(beliefId)) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + + + BeliefId const& getInitialBelief() const { + return initialBeliefId; + } + + ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { + auto const& belief = getBelief(beliefId); + STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); + auto result = storm::utility::zero(); + auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); + for (auto const &entry : belief) { + uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; + STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); + STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); + result += entry.second * pomdpActionRewardVector[choiceIndex]; + } + return result; + } + + uint32_t getBeliefObservation(BeliefId beliefId) { + return getBeliefObservation(getBelief(beliefId)); + } + + uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { + auto belief = getBelief(beliefId); + return pomdp.getNumberOfChoices(belief.begin()->first); + } + + Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { + return triangulateBelief(getBelief(beliefId), resolution); + } + + template + void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { + auto insertionRes = distr.emplace(state, value); + if (!insertionRes.second) { + insertionRes.first->second += value; + } + } + + BeliefId getNumberOfBeliefIds() const { + return beliefs.size(); + } + + std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(beliefId, actionIndex, observationResolutions); + } + + std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { + return expandInternal(beliefId, actionIndex); + } + + private: + BeliefType const& getBelief(BeliefId const& id) const { STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existend belief."); STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range."); @@ -56,10 +142,6 @@ namespace storm { return idIt->second; } - BeliefId noId() const { - return std::numeric_limits::max(); - } - std::string toString(BeliefType const& belief) const { std::stringstream str; str << "{ "; @@ -76,16 +158,6 @@ namespace storm { return str.str(); } - std::string toString(Triangulation const& t) const { - std::stringstream str; - str << "(\n"; - for (uint64_t i = 0; i < t.size(); ++i) { - str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n"; - } - str <<")\n"; - return str.str(); - } - bool isEqual(BeliefType const& first, BeliefType const& second) const { if (first.size() != second.size()) { return false; @@ -186,49 +258,11 @@ namespace storm { return true; } - template - ValueType getWeightedSum(BeliefId const& beliefId, SummandsType const& summands) { - ValueType result = storm::utility::zero(); - for (auto const& entry : getBelief(beliefId)) { - result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); - } - return result; - } - - - BeliefId const& getInitialBelief() const { - return initialBeliefId; - } - - ValueType getBeliefActionReward(BeliefId const& beliefId, uint64_t const& localActionIndex) const { - auto const& belief = getBelief(beliefId); - STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified."); - auto result = storm::utility::zero(); - auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices(); - for (auto const &entry : belief) { - uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex; - STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index."); - STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index."); - result += entry.second * pomdpActionRewardVector[choiceIndex]; - } - return result; - } - uint32_t getBeliefObservation(BeliefType belief) { STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); return pomdp.getObservation(belief.begin()->first); } - uint32_t getBeliefObservation(BeliefId beliefId) { - return getBeliefObservation(getBelief(beliefId)); - } - - uint64_t getBeliefNumberOfChoices(BeliefId beliefId) { - auto belief = getBelief(beliefId); - return pomdp.getNumberOfChoices(belief.begin()->first); - } - - Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? @@ -307,22 +341,6 @@ namespace storm { return result; } - Triangulation triangulateBelief(BeliefId beliefId, uint64_t resolution) { - return triangulateBelief(getBelief(beliefId), resolution); - } - - template - void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { - auto insertionRes = distr.emplace(state, value); - if (!insertionRes.second) { - insertionRes.first->second += value; - } - } - - BeliefId getNumberOfBeliefIds() const { - return beliefs.size(); - } - std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::map destinations; // TODO: Output as vector? @@ -369,16 +387,6 @@ namespace storm { } - std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { - return expandInternal(beliefId, actionIndex, observationResolutions); - } - - std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { - return expandInternal(beliefId, actionIndex); - } - - private: - BeliefId computeInitialBelief() { STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); From eca4dab6c069642731639aea3a0b1dcb0cb8736f Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 13:43:49 +0200 Subject: [PATCH 30/40] Beliefmanager: expanding a belief now returns a vector instead of a map --- src/storm-pomdp/storage/BeliefManager.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 0731aef35..b74390a79 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -120,11 +120,11 @@ namespace storm { return beliefs.size(); } - std::map expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { + std::vector> expandAndTriangulate(BeliefId const& beliefId, uint64_t actionIndex, std::vector const& observationResolutions) { return expandInternal(beliefId, actionIndex, observationResolutions); } - std::map expand(BeliefId const& beliefId, uint64_t actionIndex) { + std::vector> expand(BeliefId const& beliefId, uint64_t actionIndex) { return expandInternal(beliefId, actionIndex); } @@ -341,8 +341,8 @@ namespace storm { return result; } - std::map expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { - std::map destinations; + std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { + std::vector> destinations; // TODO: Output as vector? BeliefType belief = getBelief(beliefId); @@ -373,13 +373,15 @@ namespace storm { } STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); + // Insert the destination. We know that destinations have to be disjoined since they have different observations if (observationTriangulationResolutions) { Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); for (size_t j = 0; j < triangulation.size(); ++j) { - addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + // Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times + destinations.emplace_back(triangulation.gridPoints[j], triangulation.weights[j] * successor.second); } } else { - addToDistribution(destinations, getOrAddBeliefId(successorBelief), successor.second); + destinations.emplace_back(getOrAddBeliefId(successorBelief), successor.second); } } From 937659f3565f51c9b0850a2d3b7452fb86cbc23e Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Tue, 7 Apr 2020 15:32:08 +0200 Subject: [PATCH 31/40] First improvement step for Freudenthal triangulation --- src/storm-pomdp/storage/BeliefManager.h | 130 +++++++++++++----------- 1 file changed, 73 insertions(+), 57 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index b74390a79..7e37b9c16 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -263,77 +263,93 @@ namespace storm { return pomdp.getObservation(belief.begin()->first); } + struct FreudenthalData { + FreudenthalData(StateType const& pomdpState, StateType const& dimension, BeliefValueType const& x) : pomdpState(pomdpState), dimension(dimension), value(storm::utility::floor(x)), diff(x-value) { }; + StateType pomdpState; + StateType dimension; // i + BeliefValueType value; // v[i] in the Lovejoy paper + BeliefValueType diff; // d[i] in the Lovejoy paper + }; + struct FreudenthalDataComparator { + bool operator()(FreudenthalData const& first, FreudenthalData const& second) const { + if (first.diff != second.diff) { + return first.diff > second.diff; + } else { + return first.dimension < second.dimension; + } + } + }; + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { - //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); - auto nrStates = pomdp.getNumberOfStates(); + auto convResolution = storm::utility::convertNumber(resolution); // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper - // TODO avoid reallocations for these vectors - std::vector x(nrStates); - std::vector v(nrStates); - std::vector d(nrStates); - auto convResolution = storm::utility::convertNumber(resolution); - - for (size_t i = 0; i < nrStates; ++i) { - for (auto const &probEntry : belief) { - if (probEntry.first >= i) { - x[i] += convResolution * probEntry.second; - } - } - v[i] = storm::utility::floor(x[i]); - d[i] = x[i] - v[i]; + // However, we speed this up a little by exploiting that belief states usually have sparse support. + // TODO: for the sorting, it probably suffices to have a map from diffs to dimensions. The other Freudenthaldata could then also be stored in vectors, which would be a bit more like the original algorithm + + // Initialize some data + std::vector::iterator> dataIterators; + dataIterators.reserve(belief.size()); + // Initialize first row of 'qs' matrix + std::vector qsRow; + qsRow.reserve(dataIterators.size()); + std::set freudenthalData; + BeliefValueType x = convResolution; + for (auto const& entry : belief) { + auto insertionIt = freudenthalData.emplace(entry.first, dataIterators.size(), x).first; + dataIterators.push_back(insertionIt); + qsRow.push_back(dataIterators.back()->value); + x -= entry.second * convResolution; } - - auto p = storm::utility::vector::getSortedIndices(d); - - std::vector> qs(nrStates, std::vector(nrStates)); - for (size_t i = 0; i < nrStates; ++i) { - if (i == 0) { - for (size_t j = 0; j < nrStates; ++j) { - qs[i][j] = v[j]; - } - } else { - for (size_t j = 0; j < nrStates; ++j) { - if (j == p[i - 1]) { - qs[i][j] = qs[i - 1][j] + storm::utility::one(); - } else { - qs[i][j] = qs[i - 1][j]; - } + qsRow.push_back(storm::utility::zero()); + assert(!freudenthalData.empty()); + + Triangulation result; + result.weights.reserve(freudenthalData.size()); + result.gridPoints.reserve(freudenthalData.size()); + + // Insert first grid point + // TODO: this special treatment is actually not necessary. + BeliefValueType firstWeight = storm::utility::one() - freudenthalData.begin()->diff + freudenthalData.rbegin()->diff; + if (!cc.isZero(firstWeight)) { + result.weights.push_back(firstWeight); + BeliefType gridPoint; + for (StateType j = 0; j < dataIterators.size(); ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; } } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } - Triangulation result; - // The first weight is 1-sum(other weights). We therefore process the js in reverse order - BeliefValueType firstWeight = storm::utility::one(); - for (size_t j = nrStates; j > 0;) { - --j; - // First create the weights. The weights vector will be reversed at the end. - ValueType weight; - if (j == 0) { - weight = firstWeight; - } else { - weight = d[p[j - 1]] - d[p[j]]; - firstWeight -= weight; - } - if (!cc.isZero(weight)) { - result.weights.push_back(weight); - BeliefType gridPoint; - auto const& qsj = qs[j]; - for (size_t i = 0; i < nrStates - 1; ++i) { - BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1]; - if (!cc.isZero(gridPointEntry)) { - gridPoint[i] = gridPointEntry / convResolution; + if (freudenthalData.size() > 1) { + // Insert remaining grid points + auto currentSortedEntry = freudenthalData.begin(); + auto previousSortedEntry = currentSortedEntry++; + for (StateType i = 1; i < dataIterators.size(); ++i) { + // 'compute' the next row of the qs matrix + qsRow[previousSortedEntry->dimension] += storm::utility::one(); + + BeliefValueType weight = previousSortedEntry->diff - currentSortedEntry->diff; + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + + BeliefType gridPoint; + for (StateType j = 0; j < dataIterators.size(); ++j) { + BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; + } } + result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } - if (!cc.isZero(qsj[nrStates - 1])) { - gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; - } - result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); + ++previousSortedEntry; + ++currentSortedEntry; } } From 2f020ce6860af6cb45a460aca32e1d52f910df74 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 08:12:32 +0200 Subject: [PATCH 32/40] BeliefManager: Making Freudenthal happy (and fast) --- src/storm-pomdp/storage/BeliefManager.h | 122 ++++++++++-------------- 1 file changed, 53 insertions(+), 69 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 7e37b9c16..95c596005 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -263,96 +263,80 @@ namespace storm { return pomdp.getObservation(belief.begin()->first); } - struct FreudenthalData { - FreudenthalData(StateType const& pomdpState, StateType const& dimension, BeliefValueType const& x) : pomdpState(pomdpState), dimension(dimension), value(storm::utility::floor(x)), diff(x-value) { }; - StateType pomdpState; + struct FreudenthalDiff { + FreudenthalDiff(StateType const& dimension, BeliefValueType&& diff) : dimension(dimension), diff(std::move(diff)) { }; StateType dimension; // i - BeliefValueType value; // v[i] in the Lovejoy paper - BeliefValueType diff; // d[i] in the Lovejoy paper - }; - struct FreudenthalDataComparator { - bool operator()(FreudenthalData const& first, FreudenthalData const& second) const { - if (first.diff != second.diff) { - return first.diff > second.diff; + BeliefValueType diff; // d[i] + bool operator>(FreudenthalDiff const& other) const { + if (diff != other.diff) { + return diff > other.diff; } else { - return first.dimension < second.dimension; + return dimension < other.dimension; } } }; Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { - //TODO Enable chaching for this method? STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); - - auto convResolution = storm::utility::convertNumber(resolution); - - // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) - // Variable names are based on the paper - // However, we speed this up a little by exploiting that belief states usually have sparse support. - // TODO: for the sorting, it probably suffices to have a map from diffs to dimensions. The other Freudenthaldata could then also be stored in vectors, which would be a bit more like the original algorithm - - // Initialize some data - std::vector::iterator> dataIterators; - dataIterators.reserve(belief.size()); - // Initialize first row of 'qs' matrix - std::vector qsRow; - qsRow.reserve(dataIterators.size()); - std::set freudenthalData; - BeliefValueType x = convResolution; - for (auto const& entry : belief) { - auto insertionIt = freudenthalData.emplace(entry.first, dataIterators.size(), x).first; - dataIterators.push_back(insertionIt); - qsRow.push_back(dataIterators.back()->value); - x -= entry.second * convResolution; - } - qsRow.push_back(storm::utility::zero()); - assert(!freudenthalData.empty()); - + StateType numEntries = belief.size(); Triangulation result; - result.weights.reserve(freudenthalData.size()); - result.gridPoints.reserve(freudenthalData.size()); - - // Insert first grid point - // TODO: this special treatment is actually not necessary. - BeliefValueType firstWeight = storm::utility::one() - freudenthalData.begin()->diff + freudenthalData.rbegin()->diff; - if (!cc.isZero(firstWeight)) { - result.weights.push_back(firstWeight); - BeliefType gridPoint; - for (StateType j = 0; j < dataIterators.size(); ++j) { - BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; - if (!cc.isZero(gridPointEntry)) { - gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; - } + + // Quickly triangulate Dirac beliefs + if (numEntries == 1u) { + result.weights.push_back(storm::utility::one()); + result.gridPoints.push_back(getOrAddBeliefId(belief)); + } else { + + auto convResolution = storm::utility::convertNumber(resolution); + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are mostly based on the paper + // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than pomdp.getNumberOfStates()). + // Initialize diffs and the first row of the 'qs' matrix (aka v) + std::set> sorted_diffs; // d (and p?) in the paper + std::vector qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v + qsRow.reserve(numEntries); + std::vector toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices + toOriginalIndicesMap.reserve(numEntries); + BeliefValueType x = convResolution; + for (auto const& entry : belief) { + qsRow.push_back(storm::utility::floor(x)); // v + sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v + toOriginalIndicesMap.push_back(entry.first); + x -= entry.second * convResolution; } - result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); - } - - if (freudenthalData.size() > 1) { - // Insert remaining grid points - auto currentSortedEntry = freudenthalData.begin(); - auto previousSortedEntry = currentSortedEntry++; - for (StateType i = 1; i < dataIterators.size(); ++i) { - // 'compute' the next row of the qs matrix - qsRow[previousSortedEntry->dimension] += storm::utility::one(); - - BeliefValueType weight = previousSortedEntry->diff - currentSortedEntry->diff; + // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler + qsRow.push_back(storm::utility::zero()); + + result.weights.reserve(numEntries); + result.gridPoints.reserve(numEntries); + auto currentSortedDiff = sorted_diffs.begin(); + auto previousSortedDiff = sorted_diffs.end(); + --previousSortedDiff; + for (StateType i = 0; i < numEntries; ++i) { + // Compute the weight for the grid points + BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff; + if (i == 0) { + // The first weight is a bit different + weight += storm::utility::one(); + } else { + // 'compute' the next row of the qs matrix + qsRow[previousSortedDiff->dimension] += storm::utility::one(); + } if (!cc.isZero(weight)) { result.weights.push_back(weight); - + // Compute the grid point BeliefType gridPoint; - for (StateType j = 0; j < dataIterators.size(); ++j) { + for (StateType j = 0; j < numEntries; ++j) { BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1]; if (!cc.isZero(gridPointEntry)) { - gridPoint[dataIterators[j]->pomdpState] = gridPointEntry / convResolution; + gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / convResolution; } } result.gridPoints.push_back(getOrAddBeliefId(gridPoint)); } - ++previousSortedEntry; - ++currentSortedEntry; + previousSortedDiff = currentSortedDiff++; } } - STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result)); return result; } From fcee1d05fabe15de82f6213a3bd91acf25492ea1 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 10:09:45 +0200 Subject: [PATCH 33/40] Fixed an issue with dropping unexplored states. --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index c8f1775ed..fd2b54862 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -323,7 +323,7 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels)); exploredMdp = std::make_shared>(std::move(modelComponents)); status = Status::ModelFinished; - STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << "of which were flagged as truncated)."); + STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << truncatedStates.getNumberOfSetBits() << " of which were flagged as truncated)."); } @@ -368,7 +368,8 @@ namespace storm { { // beliefIdToMdpStateMap for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) { if (relevantMdpStates.get(belIdToMdpStateIt->second)) { - // Keep current entry and move on to the next one. + // Translate current entry and move on to the next one. + belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second]; ++belIdToMdpStateIt; } else { STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first), "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId"); @@ -619,7 +620,7 @@ namespace storm { return findRes->second; } } - // At this poind we need to add a new MDP state + // At this point we need to add a new MDP state MdpStateType result = getCurrentNumberOfMdpStates(); assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size()); mdpStateToBeliefIdMap.push_back(beliefId); From 26a0544e4ba075bfbc773f75ee5b1489f41ada1f Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 10:10:34 +0200 Subject: [PATCH 34/40] BeiliefManager: Use flat_maps for beliefs and hash_maps for belief storage. --- src/storm-pomdp/storage/BeliefManager.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/storm-pomdp/storage/BeliefManager.h b/src/storm-pomdp/storage/BeliefManager.h index 95c596005..25ec3a3d2 100644 --- a/src/storm-pomdp/storage/BeliefManager.h +++ b/src/storm-pomdp/storage/BeliefManager.h @@ -1,9 +1,9 @@ #pragma once -#include +#include #include -//#include - +#include +#include "storm/adapters/RationalNumberAdapter.h" #include "storm/utility/macros.h" #include "storm/exceptions/UnexpectedException.h" @@ -15,8 +15,7 @@ namespace storm { public: typedef typename PomdpType::ValueType ValueType; - //typedef boost::container::flat_map BeliefType - typedef std::map BeliefType; + typedef boost::container::flat_map BeliefType; // iterating over this shall be ordered (for correct hash computation) typedef uint64_t BeliefId; BeliefManager(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { @@ -343,7 +342,6 @@ namespace storm { std::vector> expandInternal(BeliefId const& beliefId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::vector> destinations; - // TODO: Output as vector? BeliefType belief = getBelief(beliefId); @@ -411,11 +409,23 @@ namespace storm { return insertioRes.first->second; } + struct BeliefHash { + std::size_t operator()(const BeliefType& belief) const { + std::size_t seed = 0; + // Assumes that beliefs are ordered + for (auto const& entry : belief) { + boost::hash_combine(seed, entry.first); + boost::hash_combine(seed, entry.second); + } + return seed; + } + }; + PomdpType const& pomdp; std::vector pomdpActionRewardVector; std::vector beliefs; - std::map beliefToIdMap; + std::unordered_map beliefToIdMap; BeliefId initialBeliefId; storm::utility::ConstantsComparator cc; From 03889958dab727f7a50f0f2f703a6a0c4fb055ec Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 11:29:37 +0200 Subject: [PATCH 35/40] Added a switch to control the size of the under-approximation via command line. --- .../modules/GridApproximationSettings.cpp | 10 ++++++++++ .../settings/modules/GridApproximationSettings.h | 3 ++- src/storm-pomdp-cli/storm-pomdp.cpp | 1 + .../ApproximatePOMDPModelchecker.cpp | 16 ++++++++++++++-- .../modelchecker/ApproximatePOMDPModelchecker.h | 1 + 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp index 8006b3851..6b5b17677 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.cpp @@ -19,6 +19,7 @@ namespace storm { const std::string limitBeliefExplorationOption = "limit-exploration"; const std::string numericPrecisionOption = "numeric-precision"; const std::string cacheSimplicesOption = "cache-simplices"; + const std::string unfoldBeliefMdpOption = "unfold-belief-mdp"; GridApproximationSettings::GridApproximationSettings() : ModuleSettings(moduleName) { @@ -35,6 +36,7 @@ namespace storm { this->addOption(storm::settings::OptionBuilder(moduleName, cacheSimplicesOption, false,"Enables caching of simplices which requires more memory but can be faster.").build()); + this->addOption(storm::settings::OptionBuilder(moduleName, unfoldBeliefMdpOption, false,"Sets the (initial-) size threshold of the unfolded belief MDP (higher means more precise results, 0 means automatic choice)").addArgument(storm::settings::ArgumentBuilder::createUnsignedIntegerArgument("value","the maximal number of states").setDefaultValueUnsignedInteger(0).build()).build()); } bool GridApproximationSettings::isRefineSet() const { @@ -65,6 +67,14 @@ namespace storm { return this->getOption(cacheSimplicesOption).getHasOptionBeenSet(); } + bool GridApproximationSettings::isUnfoldBeliefMdpSizeThresholdSet() const { + return this->getOption(unfoldBeliefMdpOption).getHasOptionBeenSet(); + } + + uint64_t GridApproximationSettings::getUnfoldBeliefMdpSizeThreshold() const { + return this->getOption(unfoldBeliefMdpOption).getArgumentByName("value").getValueAsUnsignedInteger(); + } + } // namespace modules } // namespace settings } // namespace storm diff --git a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h index a01fdbd77..88e484128 100644 --- a/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h +++ b/src/storm-pomdp-cli/settings/modules/GridApproximationSettings.h @@ -27,7 +27,8 @@ namespace storm { bool isNumericPrecisionSetFromDefault() const; double getNumericPrecision() const; bool isCacheSimplicesSet() const; - + bool isUnfoldBeliefMdpSizeThresholdSet() const; + uint64_t getUnfoldBeliefMdpSizeThreshold() const; // The name of the module. static const std::string moduleName; diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 9b3026832..181c5c31f 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -109,6 +109,7 @@ namespace storm { options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); + options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index eb116fc7b..070279ac4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -35,6 +35,7 @@ namespace storm { refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; + beliefMdpSizeThreshold = 0ull; } template @@ -180,7 +181,7 @@ namespace storm { approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; resultValue = approx->getComputedValueAtInitialState(); - underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); + underApproxSizeThreshold = std::max(approx->getExploredMdp()->getNumberOfStates(), underApproxSizeThreshold); } } { // Underapproximation (Uses a fresh Belief manager) @@ -189,6 +190,12 @@ namespace storm { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (options.beliefMdpSizeThreshold) { + underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); + } + if (underApproxSizeThreshold == 0) { + underApproxSizeThreshold = pomdp.getNumberOfStates() * pomdp.getMaxNrStatesWithSameObservation(); // Heuristically select this (only relevant if the over-approx could not be build) + } buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); @@ -221,7 +228,12 @@ namespace storm { overApproxValue = overApproximation->getComputedValueAtInitialState(); // UnderApproximation - uint64_t underApproxSizeThreshold = std::max(overApproximation->getExploredMdp()->getNumberOfStates(), 10); + uint64_t underApproxSizeThreshold; + if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0ull) { + underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); + } else { + underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); + } auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); if (!underApproximation->hasComputedValues()) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 1d5521b6a..823eebf60 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -34,6 +34,7 @@ namespace storm { ValueType refinementPrecision; /// Used to decide when the refinement should terminate ValueType numericPrecision; /// Used to decide whether two values are equal bool cacheSubsimplices; /// Enables caching of subsimplices + boost::optional beliefMdpSizeThreshold; /// Sets the (initial) size of the unfolded belief MDP. 0 means auto selection. }; struct Result { From f4f9376c966489ce0ba288235c54360883590138 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 11:30:19 +0200 Subject: [PATCH 36/40] Vector: Added a method for element-wise comparison of two vectors. --- src/storm/utility/vector.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/storm/utility/vector.h b/src/storm/utility/vector.h index dd562a8e1..af6098038 100644 --- a/src/storm/utility/vector.h +++ b/src/storm/utility/vector.h @@ -142,6 +142,12 @@ namespace storm { return true; } + template + bool compareElementWise(std::vector const& left, std::vector const& right, Comparator comp = std::less()) { + STORM_LOG_ASSERT(left.size() == right.size(), "Expected that vectors for comparison have equal size"); + return std::equal(left.begin(), left.end(), right.begin(), comp); + } + /*! * Selects the elements from a vector at the specified positions and writes them consecutively into another vector. * @param vector The vector into which the selected elements are to be written. From 3c5df045c1138457b5a98f49ec59a245cb0f7994 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 12:18:32 +0200 Subject: [PATCH 37/40] Added a few assertions --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 2 ++ .../modelchecker/TrivialPomdpValueBoundsModelChecker.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index fd2b54862..e5d233aec 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -473,6 +473,8 @@ namespace storm { std::unique_ptr res(storm::api::verifyWithSparseEngine(exploredMdp, task)); if (res) { values = std::move(res->asExplicitQuantitativeCheckResult().getValueVector()); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal()), "Computed values are smaller than the lower bound."); + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal()), "Computed values are larger than the upper bound."); } else { STORM_LOG_ASSERT(storm::utility::resources::isTerminate(), "Empty check result!"); STORM_LOG_ERROR("No result obtained while checking."); diff --git a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h index 862a82a05..ca4c2192f 100644 --- a/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h +++ b/src/storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h @@ -88,6 +88,7 @@ namespace storm { pomdpScheduler.setChoice(choiceDistribution, state); } } + STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler."); auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); auto resultPtr2 = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); @@ -104,6 +105,7 @@ namespace storm { result.lower = std::move(pomdpSchedulerResult); result.upper = std::move(fullyObservableResult); } + STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower, result.upper, std::less_equal()), "Lower bound is larger than upper bound"); return result; } From 26764137f5cbc3a22181ab4e243d265f1943d03d Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 12:30:39 +0200 Subject: [PATCH 38/40] Fix for --unfold-belief-mdp setting --- src/storm-pomdp-cli/storm-pomdp.cpp | 4 +++- src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 181c5c31f..19e139d1d 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -109,7 +109,9 @@ namespace storm { options.refinementPrecision = storm::utility::convertNumber(gridSettings.getRefinementPrecision()); options.numericPrecision = storm::utility::convertNumber(gridSettings.getNumericPrecision()); options.cacheSubsimplices = gridSettings.isCacheSimplicesSet(); - options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); + if (gridSettings.isUnfoldBeliefMdpSizeThresholdSet()) { + options.beliefMdpSizeThreshold = gridSettings.getUnfoldBeliefMdpSizeThreshold(); + } if (storm::NumberTraits::IsExact) { if (gridSettings.isNumericPrecisionSetFromDefault()) { STORM_LOG_WARN_COND(storm::utility::isZero(options.numericPrecision), "Setting numeric precision to zero because exact arithmethic is used."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 070279ac4..622e63512 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -35,7 +35,7 @@ namespace storm { refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; - beliefMdpSizeThreshold = 0ull; + beliefMdpSizeThreshold = boost::none; } template @@ -190,7 +190,7 @@ namespace storm { manager->setRewardModel(rewardModelName); } auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (options.beliefMdpSizeThreshold) { + if (options.beliefMdpSizeThreshold && options.beliefMdpSizeThreshold.get() > 0) { underApproxSizeThreshold = options.beliefMdpSizeThreshold.get(); } if (underApproxSizeThreshold == 0) { From 7ffe322e06e46595cb337a47d5b8b6dd2a4c9434 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Wed, 8 Apr 2020 12:31:45 +0200 Subject: [PATCH 39/40] SparseModelMemoryProduct: Fixed incorrect computation of state-action rewards under a randomized policy. --- src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp b/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp index 3907f0625..1f30f23fb 100644 --- a/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp +++ b/src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp @@ -410,7 +410,7 @@ namespace storm { if (isStateReachable(modelState, memoryState)) { if (scheduler && scheduler->getChoice(modelState, memoryState).isDefined()) { ValueType factor = scheduler->getChoice(modelState, memoryState).getChoiceAsDistribution().getProbability(rowOffset); - stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] = factor * modelStateActionReward; + stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] += factor * modelStateActionReward; } else { stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)] + rowOffset] = modelStateActionReward; } From 5f2a598f48e26bc7c757eaeafd2266abb0cf51f0 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Tue, 14 Apr 2020 14:29:30 -0700 Subject: [PATCH 40/40] remove unsound 1-state computation --- .../analysis/QualitativeAnalysis.cpp | 76 +------------------ 1 file changed, 2 insertions(+), 74 deletions(-) diff --git a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp index fc27620af..e288ed4ef 100644 --- a/src/storm-pomdp/analysis/QualitativeAnalysis.cpp +++ b/src/storm-pomdp/analysis/QualitativeAnalysis.cpp @@ -69,81 +69,9 @@ namespace storm { storm::storage::BitVector QualitativeAnalysis::analyseProb1Max(storm::logic::UntilFormula const& formula) const { // We consider the states that satisfy the formula with prob.1 under arbitrary schedulers as goal states. storm::storage::BitVector goalStates = storm::utility::graph::performProb1A(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), checkPropositionalFormula(formula.getLeftSubformula()), checkPropositionalFormula(formula.getRightSubformula())); - + STORM_LOG_TRACE("Prob1A states according to MDP: " << goalStates); // Now find a set of observations such that there is a memoryless scheduler inducing prob. 1 for each state whose observation is in the set. - storm::storage::BitVector candidateStates = goalStates | checkPropositionalFormula(formula.getLeftSubformula()); - storm::storage::BitVector candidateActions = pomdp.getTransitionMatrix().getRowFilter(candidateStates); - storm::storage::BitVector candidateObservations(pomdp.getNrObservations(), true); - - - bool converged = false; - while (!converged) { - converged = true; - - // Get the candidate states that can reach the goal with prob1 via candidate actions - storm::storage::BitVector newCandidates; - if (candidateActions.full()) { - newCandidates = storm::utility::graph::performProb1E(pomdp.getTransitionMatrix(), pomdp.getTransitionMatrix().getRowGroupIndices(), pomdp.getBackwardTransitions(), candidateStates, goalStates); - } else { - storm::storage::SparseMatrix filteredTransitions(pomdp.getTransitionMatrix().filterEntries(candidateActions)); - newCandidates = storm::utility::graph::performProb1E(filteredTransitions, filteredTransitions.getRowGroupIndices(), filteredTransitions.transpose(true), candidateStates, goalStates); - } - if (candidateStates != newCandidates) { - converged = false; - candidateStates = std::move(newCandidates); - } - - // Unselect all observations that have a non-candidate state - for (uint64_t state = candidateStates.getNextUnsetIndex(0); state < candidateStates.size(); state = candidateStates.getNextUnsetIndex(state + 1)) { - candidateObservations.set(pomdp.getObservation(state), false); - } - - // update the candidate actions to the set of actions that stay inside the candidate state set - std::vector candidateActionsPerObservation(pomdp.getNrObservations()); - for (auto const& state : candidateStates) { - auto& candidateActionsAtState = candidateActionsPerObservation[pomdp.getObservation(state)]; - if (candidateActionsAtState.size() == 0) { - candidateActionsAtState.resize(pomdp.getNumberOfChoices(state), true); - } - STORM_LOG_ASSERT(candidateActionsAtState.size() == pomdp.getNumberOfChoices(state), "State " + std::to_string(state) + " has " + std::to_string(pomdp.getNumberOfChoices(state)) + " actions, different from other with same observation (" + std::to_string(candidateActionsAtState.size()) + ")." ); - for (auto const& action : candidateActionsAtState) { - for (auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) { - if (!candidateStates.get(entry.getColumn())) { - candidateActionsAtState.set(action, false); - break; - } - } - } - } - - // Unselect all observations without such an action - for (auto const& o : candidateObservations) { - if (candidateActionsPerObservation[o].empty()) { - candidateObservations.set(o, false); - } - } - - // only keep the candidate states with a candidateObservation - for (auto const& state : candidateStates) { - if (!candidateObservations.get(pomdp.getObservation(state)) && !goalStates.get(state)) { - candidateStates.set(state, false); - converged = false; - } - } - - // Only keep the candidate actions originating from a candidateState. Also transform the representation of candidate actions - candidateActions.clear(); - for (auto const& state : candidateStates) { - uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state]; - for (auto const& action : candidateActionsPerObservation[pomdp.getObservation(state)]) { - candidateActions.set(offset + action); - } - } - } - - assert(goalStates.isSubsetOf(candidateStates)); - - return candidateStates; + return goalStates; }