diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 42f1872f7..9c06c4c06 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -456,7 +456,7 @@ namespace storm { STORM_PRINT("Over-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(beliefManager, targetObservations, min, computeRewards, maxUaModelSize, lowerPomdpValueBounds, upperPomdpValueBounds); if (storm::utility::resources::isTerminate() && !underApproxComponents) { // TODO: return other components needed for refinement. //return std::make_unique>(RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); @@ -953,91 +953,62 @@ namespace storm { std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, - bool computeRewards, uint64_t maxModelSize) { + bool computeRewards, uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds) { // Build the belief MDP until enough states are explored. //TODO think of other ways to stop exploration besides model size statistics.underApproximationBuildTime.start(); - - // Reserve states 0 and 1 as always sink/goal states - storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); - uint64_t extraBottomState = 0; - uint64_t extraTargetState = computeRewards ? 0 : 1; - uint64_t nextMdpStateId = extraTargetState + 1; - uint64_t mdpMatrixRow = 0; - for (uint64_t state = 0; state < nextMdpStateId; ++state) { - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); - ++mdpMatrixRow; + storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + if (computeRewards) { + explorer.startNewExploration(storm::utility::zero()); + } else { + explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } - std::vector targetStates = {extraTargetState}; - storm::storage::BitVector fullyExpandedStates; - bsmap_type beliefStateMap; - std::deque beliefsToBeExpanded; - - beliefStateMap.insert(bsmap_type::value_type(beliefManager->getInitialBelief(), nextMdpStateId)); - beliefsToBeExpanded.push_back(beliefManager->getInitialBelief()); - ++nextMdpStateId; - - // Expand the believes - storm::storage::BitVector foundBeliefs(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& belId : beliefsToBeExpanded) { - foundBeliefs.set(belId, true); + // Expand the beliefs to generate the grid on-the-fly + if (options.explorationThreshold > storm::utility::zero()) { + STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - - uint64_t currMdpState = beliefStateMap.left.at(currId); - auto const& currBelief = beliefManager->getBelief(currId); - uint32_t currObservation = beliefManager->getBeliefObservation(currBelief); - - mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + while (explorer.hasUnexploredState()) { + uint64_t currId = explorer.exploreNextState(); + uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - // Make this state absorbing - targetStates.push_back(currMdpState); - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - ++mdpMatrixRow; - } else if (currMdpState > maxModelSize) { - if (min) { - // Get an upper bound here - if (computeRewards) { - // TODO: With minimizing rewards we need an upper bound! - // In other cases, this could be helpflull as well. - // For now, add a selfloop to "generate" infinite reward - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, storm::utility::one()); - } - } else { - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, computeRewards ? extraTargetState : extraBottomState, storm::utility::one()); - } - ++mdpMatrixRow; + explorer.setCurrentStateIsTarget(); + explorer.addSelfloopTransition(); } else { - fullyExpandedStates.grow(nextMdpStateId, false); - fullyExpandedStates.set(currMdpState, true); - // Iterate over all actions and add the corresponding transitions - uint64_t someState = currBelief.begin()->first; - uint64_t numChoices = pomdp.getNumberOfChoices(someState); - for (uint64_t action = 0; action < numChoices; ++action) { - auto successorBeliefs = beliefManager->expand(currId, action); - // Check for newly found beliefs - foundBeliefs.grow(beliefManager->getNumberOfBeliefIds(), false); - for (auto const& successor : successorBeliefs) { - auto successorId = successor.first; - if (!foundBeliefs.get(successorId)) { - foundBeliefs.set(successorId); - beliefsToBeExpanded.push_back(successorId); - beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); - ++nextMdpStateId; + bool stopExploration = false; + if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } else if (explorer.getCurrentNumberOfMdpStates() >= maxModelSize) { + stopExploration = true; + explorer.setCurrentStateIsTruncated(); + } + for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); } - auto successorMdpState = beliefStateMap.left.at(successorId); - // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorBeliefs are sorted. - mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - ++mdpMatrixRow; + if (stopExploration) { + if (computeRewards) { + explorer.addTransitionsToExtraStates(action, truncationProbability); + } else { + explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + explorer.computeRewardAtCurrentState(action, truncationValueBound); + } } } if (storm::utility::resources::isTerminate()) { @@ -1045,56 +1016,25 @@ namespace storm { break; } } - statistics.underApproximationStates = nextMdpStateId; + statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); return nullptr; } - fullyExpandedStates.resize(nextMdpStateId, false); - storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(beliefManager->getInitialBelief())); - for (auto targetState : targetStates) { - mdpLabeling.addLabelToState("target", targetState); - } - storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); - auto model = std::make_shared>(std::move(modelComponents)); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow, storm::utility::zero())); - for (auto const &iter : beliefStateMap.left) { - if (fullyExpandedStates.get(iter.second)) { - auto const& currentBelief = beliefManager->getBelief(iter.first); - auto representativeState = currentBelief.begin()->first; - for (uint64_t action = 0; action < pomdp.getNumberOfChoices(representativeState); ++action) { - uint64_t mdpChoice = model->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); - mdpRewardModel.setStateActionReward(mdpChoice, beliefManager->getBeliefActionReward(iter.first, action)); - } - } - } - model->addRewardModel("default", mdpRewardModel); - model->restrictRewardModels(std::set({"default"})); - } - - model->printModelInformationToStream(std::cout); + explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); + STORM_PRINT("Under Approximation MDP build took " << statistics.underApproximationBuildTime << " seconds." << std::endl); + explorer.getExploredMdp()->printModelInformationToStream(std::cout); - auto property = createStandardProperty(min, computeRewards); - auto task = createStandardCheckTask(property, std::vector()); - statistics.underApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return nullptr; - } - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(model->getNumberOfStates(), true))); - auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap.left.at(beliefManager->getInitialBelief())]; - return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); + STORM_PRINT("Time Underapproximation: " << statistics.underApproximationCheckTime << " seconds." << std::endl); + STORM_PRINT("Under-Approximation Result: " << explorer.getComputedValueAtInitialState() << std::endl); + + return std::make_unique>(UnderApproxComponents{explorer.getComputedValueAtInitialState(), {}, {}}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6216de097..925bff5b5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -162,7 +162,7 @@ namespace storm { uint64_t maxModelSize); std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize); + uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds); /** * Constructs the initial belief for the given POMDP