From a3e92d2f72952e759effe1cf765f7d7cad87a2a9 Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Mon, 30 Mar 2020 12:18:06 +0200 Subject: [PATCH] Using the new reward functionalities of BliefGrid. This also fixes setting rewards in a wrong way (previously, the same reward was assigned to states with the same observation). --- .../ApproximatePOMDPModelchecker.cpp | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index fb5264ca8..3aeb81d18 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -137,7 +137,28 @@ namespace storm { stream << "##########################################" << std::endl; } - + + std::shared_ptr createStandardProperty(bool min, bool computeRewards) { + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + return storm::api::extractFormulasFromProperties(propertyVector).front(); + } + + template + storm::modelchecker::CheckTask createStandardCheckTask(std::shared_ptr& property, std::vector&& hintVector) { + //Note: The property should not run out of scope after calling this because the task only stores the property by reference. + // Therefore, this method needs the property by reference (and not const reference) + auto task = storm::api::createTask(property, false); + if (!hintVector.empty()) { + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(std::move(hintVector)); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + } + return task; + } template std::unique_ptr> @@ -360,6 +381,10 @@ namespace storm { } storm::storage::BeliefGrid> beliefGrid(pomdp, options.numericPrecision); + if (computeRewards) { + beliefGrid.setRewardModel(); + } + bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; @@ -520,37 +545,27 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); auto overApproxMdp = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); } } } overApproxMdp->addRewardModel("default", mdpRewardModel); - overApproxMdp->restrictRewardModels(std::set({"default"})); } statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); overApproxMdp->printModelInformationToStream(std::cout); auto modelPtr = std::static_pointer_cast>(overApproxMdp); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); - auto hint = storm::modelchecker::ExplicitModelCheckerHint(); - hint.setResultHint(hintVector); - auto hintPtr = std::make_shared>(hint); - task.setHint(hintPtr); + auto property = createStandardProperty(min, computeRewards); + auto task = createStandardCheckTask(property, std::move(hintVector)); + statistics.overApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); statistics.overApproximationCheckTime.stop(); @@ -1172,16 +1187,14 @@ namespace storm { storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); auto model = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); for (auto const &iter : beliefStateMap.left) { if (fullyExpandedStates.get(iter.second)) { - auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto const& currentBelief = beliefGrid.getGridPoint(iter.first); auto representativeState = currentBelief.begin()->first; for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - // Add the reward uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); - mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + mdpRewardModel.setStateActionReward(mdpChoice, beliefGrid.getBeliefActionReward(currentBelief, action)); } } } @@ -1192,17 +1205,11 @@ namespace storm { model->printModelInformationToStream(std::cout); statistics.underApproximationBuildTime.stop(); - std::string propertyString; - if (computeRewards) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - + auto property = createStandardProperty(min, computeRewards); + auto task = createStandardCheckTask(property, std::vector()); + statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.underApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { return nullptr;