diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 146d1ff3e..4976f3404 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -23,7 +23,6 @@ #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" -#include "storm-pomdp/storage/BeliefGrid.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" @@ -477,7 +476,7 @@ namespace storm { std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { - auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector); + auto successorGridPoints = beliefGrid.expandAndTriangulate(currId, action, observationResolutionVector); // Check for newly found grid points foundBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); for (auto const& successor : successorGridPoints) { @@ -1072,6 +1071,158 @@ namespace storm { return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); } + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + std::set const &targetObservations, bool min, + bool computeRewards, uint64_t maxModelSize) { + // Build the belief MDP until enough states are explored. + //TODO think of other ways to stop exploration besides model size + + statistics.underApproximationBuildTime.start(); + + // Reserve states 0 and 1 as always sink/goal states + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); + uint64_t extraBottomState = 0; + uint64_t extraTargetState = computeRewards ? 0 : 1; + uint64_t nextMdpStateId = extraTargetState + 1; + uint64_t mdpMatrixRow = 0; + for (uint64_t state = 0; state < nextMdpStateId; ++state) { + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); + ++mdpMatrixRow; + } + std::vector targetStates = {extraTargetState}; + + bsmap_type beliefStateMap; + std::deque beliefsToBeExpanded; + + auto initialBeliefId = beliefGrid.getInitialBelief(); + beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, nextMdpStateId)); + beliefsToBeExpanded.push_back(initialBeliefId); + ++nextMdpStateId; + + // Expand the believes + storm::storage::BitVector foundBeliefs(beliefGrid.getNumberOfGridPointIds(), false); + for (auto const& belId : beliefsToBeExpanded) { + foundBeliefs.set(belId, true); + } + while (!beliefsToBeExpanded.empty()) { + uint64_t currId = beliefsToBeExpanded.front(); + beliefsToBeExpanded.pop_front(); + + uint64_t currMdpState = beliefStateMap.left.at(currId); + auto const& currBelief = beliefGrid.getGridPoint(currId); + uint32_t currObservation = beliefGrid.getBeliefObservation(currBelief); + + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + + if (targetObservations.count(currObservation) != 0) { + // Make this state absorbing + targetStates.push_back(currMdpState); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); + ++mdpMatrixRow; + } else if (currMdpState > maxModelSize) { + // In other cases, this could be helpflull as well. + if (min) { + // Get an upper bound here + if (computeRewards) { + // TODO: With minimizing rewards we need an upper bound! + // For now, add a selfloop to "generate" infinite reward + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); + } else { + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one()); + } + } else { + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one()); + } + ++mdpMatrixRow; + } else { + // Iterate over all actions and add the corresponding transitions + uint64_t someState = currBelief.begin()->first; + uint64_t numChoices = pomdp.getNumberOfChoices(someState); + for (uint64_t action = 0; action < numChoices; ++action) { + auto successorBeliefs = beliefGrid.expand(currId, action); + // Check for newly found beliefs + foundBeliefs.grow(beliefGrid.getNumberOfGridPointIds(), false); + for (auto const& successor : successorBeliefs) { + auto successorId = successor.first; + if (!foundBeliefs.get(successorId)) { + foundBeliefs.set(successorId); + beliefsToBeExpanded.push_back(successorId); + beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); + ++nextMdpStateId; + } + auto successorMdpState = beliefStateMap.left.at(successorId); + // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); + } + ++mdpMatrixRow; + } + } + if (storm::utility::resources::isTerminate()) { + statistics.underApproximationBuildAborted = true; + break; + } + } + statistics.underApproximationStates = nextMdpStateId; + if (storm::utility::resources::isTerminate()) { + statistics.underApproximationBuildTime.stop(); + return nullptr; + } + + storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBeliefId)); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); + auto model = std::make_shared>(std::move(modelComponents)); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); + for (auto const &iter : beliefStateMap.left) { + auto currentBelief = beliefGrid.getGridPoint(iter.first); + auto representativeState = currentBelief.begin()->first; + for (uint64_t action = 0; action < model->getNumberOfChoices(representativeState); ++action) { + // Add the reward + uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); + uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); + mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); + } + } + model->addRewardModel("default", mdpRewardModel); + model->restrictRewardModels(std::set({"default"})); + } + + model->printModelInformationToStream(std::cout); + statistics.underApproximationBuildTime.stop(); + + std::string propertyString; + if (computeRewards) { + propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + } else { + propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + } + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + statistics.underApproximationCheckTime.start(); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); + statistics.underApproximationCheckTime.stop(); + if (storm::utility::resources::isTerminate() && !res) { + return nullptr; + } + STORM_LOG_ASSERT(res, "Result does not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); + auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; + + return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); + } + template storm::storage::SparseMatrix diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 4820ea380..32daa0876 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -3,6 +3,7 @@ #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" #include "storm-pomdp/storage/Belief.h" +#include "storm-pomdp/storage/BeliefGrid.h" #include #include "storm/storage/jani/Property.h" @@ -162,6 +163,9 @@ namespace storm { std::set const &targetObservations, uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); + std::unique_ptr> computeUnderapproximation(storm::storage::BeliefGrid>& beliefGrid, + std::set const &targetObservations, bool min, bool computeReward, + uint64_t maxModelSize); /** * Constructs the initial belief for the given POMDP diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h index a75872ee4..48da1b85b 100644 --- a/src/storm-pomdp/storage/BeliefGrid.h +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -10,7 +10,8 @@ namespace storm { namespace storage { - template + template + // TODO: Change name. This actually does not only manage grid points. class BeliefGrid { public: @@ -137,6 +138,28 @@ namespace storm { return true; } + BeliefId getInitialBelief() { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, + "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); + BeliefType belief; + belief[*pomdp.getInitialStates().begin()] = storm::utility::one(); + + STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief."); + return getOrAddGridPointId(belief); + } + + uint32_t getBeliefObservation(BeliefType belief) { + STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief."); + return pomdp.getObservation(belief.begin()->first); + } + + uint32_t getBeliefObservation(BeliefId beliefId) { + return getBeliefObservation(getGridPoint(beliefId)); + } + + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { //TODO this can also be simplified using the sparse vector interpretation //TODO Enable chaching for this method? @@ -229,9 +252,9 @@ namespace storm { return gridPoints.size(); } - std::map expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { - + std::map expandInternal(BeliefId const& gridPointId, uint64_t actionIndex, boost::optional> const& observationTriangulationResolutions = boost::none) { std::map destinations; // The belief ids should be ordered + // TODO: Does this make sense? It could be better to order them afterwards because now we rely on the fact that MDP states have the same order than their associated BeliefIds BeliefType gridPoint = getGridPoint(gridPointId); @@ -247,7 +270,7 @@ namespace storm { } } - // Now for each successor observation we find and triangulate the successor belief + // Now for each successor observation we find and potentially triangulate the successor belief for (auto const& successor : successorObs) { BeliefType successorBelief; for (auto const& pointEntry : gridPoint) { @@ -261,9 +284,13 @@ namespace storm { } STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); - Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]); - for (size_t j = 0; j < triangulation.size(); ++j) { - addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + if (observationTriangulationResolutions) { + Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.get()[successor.first]); + for (size_t j = 0; j < triangulation.size(); ++j) { + addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + } + } else { + addToDistribution(destinations, getOrAddGridPointId(successorBelief), successor.second); } } @@ -271,6 +298,14 @@ namespace storm { } + std::map expandAndTriangulate(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { + return expandInternal(gridPointId, actionIndex, observationResolutions); + } + + std::map expand(BeliefId const& gridPointId, uint64_t actionIndex) { + return expandInternal(gridPointId, actionIndex); + } + private: BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { @@ -284,7 +319,6 @@ namespace storm { } PomdpType const& pomdp; - uint64_t resolution; std::vector gridPoints; std::map gridPointToIdMap;