From c2ddea14806cfabc81e1b956ced220129e6f463b Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 3 Apr 2020 12:41:55 +0200 Subject: [PATCH] First (re-) implementation of refinement. (probably needs some testing/debugging) --- src/storm-pomdp/builder/BeliefMdpExplorer.h | 60 +- .../ApproximatePOMDPModelchecker.cpp | 712 +++++------------- .../ApproximatePOMDPModelchecker.h | 141 +--- 3 files changed, 267 insertions(+), 646 deletions(-) diff --git a/src/storm-pomdp/builder/BeliefMdpExplorer.h b/src/storm-pomdp/builder/BeliefMdpExplorer.h index 426eff188..2a97c5e05 100644 --- a/src/storm-pomdp/builder/BeliefMdpExplorer.h +++ b/src/storm-pomdp/builder/BeliefMdpExplorer.h @@ -43,6 +43,10 @@ namespace storm { } BeliefMdpExplorer(BeliefMdpExplorer&& other) = default; + BeliefManagerType const& getBeliefManager() const { + return *beliefManager; + } + void startNewExploration(boost::optional extraTargetStateValue = boost::none, boost::optional extraBottomStateValue = boost::none) { status = Status::Exploring; // Reset data from potential previous explorations @@ -101,7 +105,7 @@ namespace storm { exploredBeliefIds.clear(); exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false); exploredMdpTransitions.clear(); - exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices); + exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices()); exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices(); mdpActionRewards.clear(); if (exploredMdp->hasRewardModel()) { @@ -235,7 +239,7 @@ namespace storm { for (auto const& transition : exploredMdp->getTransitionMatrix().getRow(choiceIndex)) { internalAddTransition(choiceIndex, transition.getColumn(), transition.getValue()); // Check whether exploration is needed - auto beliefId = mdpStateToBeliefIdMap[transition.getColumn()]; + auto beliefId = getBeliefId(transition.getColumn()); if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state if (!exploredBeliefIds.get(beliefId)) { // This belief needs exploration @@ -397,6 +401,10 @@ namespace storm { status = Status::ModelChecked; } + bool hasComputedValues() const { + return status == Status::ModelChecked; + } + std::vector const& getValuesOfExploredMdp() const { STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status."); return values; @@ -408,6 +416,51 @@ namespace storm { return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)]; } + MdpStateType getBeliefId(MdpStateType exploredMdpState) const { + STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status."); + return mdpStateToBeliefIdMap[exploredMdpState]; + } + + struct SuccessorObservationInformation { + SuccessorObservationInformation(ValueType const& obsProb, ValueType const& maxProb, uint64_t const& count) : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) { + // Intentionally left empty. + } + + void join(SuccessorObservationInformation other) { + observationProbability += other.observationProbability; + maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs); + successorWithObsCount += other.successorWithObsCount; + } + + ValueType observationProbability; /// The probability we move to the corresponding observation. + ValueType maxProbabilityToSuccessorWithObs; /// The maximal probability to move to a successor with the corresponding observation. + uint64_t successorWithObsCount; /// The number of successors with this observation + }; + + void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior"); + uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex; + gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + } + + void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map gatheredSuccessorObservations) { + STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before"); + for (auto const& entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) { + auto const& beliefId = getBeliefId(entry.getColumn()); + if (beliefId != beliefManager->noId()) { + auto const& obs = beliefManager->getBeliefObservation(beliefId); + SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1); + auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info); + if (!obsInsertion.second) { + // There already is an entry for this observation, so join the two informations + obsInsertion.first->second.join(info); + } + } + } + } + + private: MdpStateType noState() const { return std::numeric_limits::max(); @@ -438,7 +491,8 @@ namespace storm { } MdpStateType getCurrentBeliefId() const { - return mdpStateToBeliefIdMap[getCurrentMdpState()]; + STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status."); + return getBeliefId(getCurrentMdpState()); } void internalAddTransition(uint64_t const& row, MdpStateType const& column, ValueType const& value) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 436fc3e09..4526607d4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -171,8 +171,9 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); - if (approx) { + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, manager, approx); + if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; @@ -185,8 +186,9 @@ namespace storm { if (rewardModelName) { manager->setRewardModel(rewardModelName); } - auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); - if (approx) { + auto approx = std::make_shared(manager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, manager, approx); + if (approx->hasComputedValues()) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.upperBound : result.lowerBound; @@ -200,23 +202,27 @@ namespace storm { // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); - auto beliefManager = std::make_shared(pomdp, options.numericPrecision); + auto overApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); + auto underApproxBeliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { - beliefManager->setRewardModel(rewardModelName); + overApproxBeliefManager->setRewardModel(rewardModelName); + underApproxBeliefManager->setRewardModel(rewardModelName); } // OverApproximaion - auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); - if (!overApproximation) { + auto overApproximation = std::make_shared(overApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), false, nullptr, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (!overApproximation->hasComputedValues()) { return; } ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; overApproxValue = overApproximation->getComputedValueAtInitialState(); - // UnderApproximation TODO: use same belief manager?) - uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); - auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); - if (!underApproximation) { + // UnderApproximation + uint64_t underApproxSizeThreshold = std::max(overApproximation->getExploredMdp()->getNumberOfStates(), 10); + auto underApproximation = std::make_shared(underApproxBeliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (!underApproximation->hasComputedValues()) { return; } ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; @@ -225,379 +231,165 @@ namespace storm { // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; + ValueType refinementAggressiveness = storm::utility::zero(); while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } - // TODO the actual refinement - /* - // choose which observation(s) to refine - std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); - std::vector beliefCount(pomdp.getNrObservations(), 0); - bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin(); - while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) { - auto currentBelief = res->beliefList[underApproxStateBeliefIter->second]; - beliefCount[currentBelief.observation] += 1; - bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second); - if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) { - // If there is an over-approximate value for the belief, use it - auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first]; - obsAccumulator[currentBelief.observation] += diff; - } else { - //otherwise, we approximate a value TODO this is critical, we have to think about it - auto overApproxValue = storm::utility::zero(); - auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); - auto subSimplex = temp.first; - auto lambdas = temp.second; - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); - bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); - if (approxIter != res->overApproxBeliefStateMap.left.end()) { - overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; - } else { - overApproxValue += lambdas[j]; - } - } - } - obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first]; - } - ++underApproxStateBeliefIter; - } - - - //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { - // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); - //} - changedObservations.clear(); - - //TODO think about some other scoring methods - auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); - //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) { - lastMinScore = maxAvgDifference; - auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); - STORM_PRINT("Set all to " << maxRes + 1 << std::endl) - for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - observationResolutionVector[i] = maxRes + 1; - changedObservations.insert(i); + + // Refine over-approximation + refinementAggressiveness *= storm::utility::convertNumber(1.1);; + buildOverApproximation(targetObservations, min, rewardModelName.is_initialized(), true, &refinementAggressiveness, observationResolutionVector, overApproxBeliefManager, overApproximation); + if (overApproximation->hasComputedValues()) { + overApproxValue = overApproximation->getComputedValueAtInitialState(); + } else { + break; } - //} else { - // lastMinScore = std::min(maxAvgDifference, lastMinScore); - // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) - // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) - // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) - // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { - //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) - // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { - //STORM_PRINT(" *** ") - // observationResolutionVector[i] += 1; - // changedObservations.insert(i); - // } - //STORM_PRINT(std::endl) - // } - //} - if (underApproxModelSize < std::numeric_limits::max() - 101) { - underApproxModelSize += 100; + + // Refine under-approximation + underApproxSizeThreshold *= storm::utility::convertNumber(storm::utility::convertNumber(underApproxSizeThreshold) * (storm::utility::one() + refinementAggressiveness)); + underApproxSizeThreshold = std::max(underApproxSizeThreshold, overApproximation->getExploredMdp()->getNumberOfStates()); + buildUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), underApproxSizeThreshold, underApproxBeliefManager, underApproximation); + if (underApproximation->hasComputedValues()) { + underApproxValue = underApproximation->getComputedValueAtInitialState(); + } else { + break; } - STORM_PRINT( - "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, - res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); - //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || - cc.isEqual(res->underApproxValue, res->overApproxValue), - "The value for the under-approximation is larger than the value for the over-approximation."); - */ ++statistics.refinementSteps.get(); } } + /*! + * Heuristically rates the quality of the approximation described by the given successor observation info. + * Here, 0 means a bad approximation and 1 means a good approximation. + */ + template + typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::rateObservation(typename ExplorerType::SuccessorObservationInformation const& info) { + auto n = storm::utility::convertNumber(info.successorWithObsCount); + auto one = storm::utility::one(); + + // Create the actual rating for this observation at this choice from the given info + ValueType obsChoiceRating = info.maxProbabilityToSuccessorWithObs / info.observationProbability; + // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1 + // Normalize the rating so that it ranges from 0 to 1, where + // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely approximated. + obsChoiceRating = (obsChoiceRating * n - one) / (n - one); + return obsChoiceRating; + } + + template + std::vector::ValueType> ApproximatePOMDPModelchecker::getObservationRatings(std::shared_ptr const& overApproximation) { + uint64_t numMdpChoices = overApproximation->getExploredMdp()->getNumberOfChoices(); + + std::vector resultingRatings(pomdp.getNrObservations(), storm::utility::one()); + + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + for (uint64_t mdpChoice = 0; mdpChoice < numMdpChoices; ++mdpChoice) { + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + auto const& obs = obsInfo.first; + ValueType obsChoiceRating = rateObservation(obsInfo.second); + + // The rating of the observation will be the minimum over all choice-based observation ratings + resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating); + } + } + return resultingRatings; + } + template - std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { + void ApproximatePOMDPModelchecker::buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { + STORM_LOG_ASSERT(!refine || refinementAggressiveness != nullptr, "Refinement enabled but no aggressiveness given"); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness >= storm::utility::zero(), "Can not refine with negative aggressiveness."); + STORM_LOG_ASSERT(!refine || *refinementAggressiveness <= storm::utility::one(), "Refinement with aggressiveness > 1 is invalid."); + statistics.overApproximationBuildTime.start(); - storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (computeRewards) { - explorer.startNewExploration(storm::utility::zero()); + storm::storage::BitVector refinedObservations; + if (!refine) { + // If we build the model from scratch, we first have to setup the explorer for the overApproximation. + if (computeRewards) { + overApproximation->startNewExploration(storm::utility::zero()); + } else { + overApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } } else { - explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); + // If we refine the existing overApproximation, we need to find out which observation resolutions need refinement. + auto obsRatings = getObservationRatings(overApproximation); + ValueType minRating = *std::min_element(obsRatings.begin(), obsRatings.end()); + // Potentially increase the aggressiveness so that at least one observation actually gets refinement. + *refinementAggressiveness = std::max(minRating, *refinementAggressiveness); + refinedObservations = storm::utility::vector::filter(obsRatings, [&refinementAggressiveness](ValueType const& r) { return r <= *refinementAggressiveness;}); + STORM_PRINT("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations."); + for (auto const& obs : refinedObservations) { + // Heuristically increment the resolution at the refined observations (also based on the refinementAggressiveness) + ValueType incrementValue = storm::utility::one() + (*refinementAggressiveness) * storm::utility::convertNumber(observationResolutionVector[obs]); + observationResolutionVector[obs] += storm::utility::convertNumber(storm::utility::ceil(incrementValue)); + } + overApproximation->restartExploration(); } - // Expand the beliefs to generate the grid on-the-fly - while (explorer.hasUnexploredState()) { - uint64_t currId = explorer.exploreNextState(); + // Start exploration + std::map gatheredSuccessorObservations; // Declare here to avoid reallocations + while (overApproximation->hasUnexploredState()) { + uint64_t currId = overApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - explorer.setCurrentStateIsTarget(); - explorer.addSelfloopTransition(); + overApproximation->setCurrentStateIsTarget(); + overApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + if (storm::utility::abs(overApproximation->getUpperValueBoundAtCurrentState() - overApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; - explorer.setCurrentStateIsTruncated(); + overApproximation->setCurrentStateIsTruncated(); } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); - for (auto const& successor : successorGridPoints) { - bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); - } - } - if (stopExploration) { - if (computeRewards) { - explorer.addTransitionsToExtraStates(action, truncationProbability); - } else { - explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); - } - } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - explorer.computeRewardAtCurrentState(action, truncationValueBound); - } - } - } - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildAborted = true; - break; - } - } - statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); - if (storm::utility::resources::isTerminate()) { - statistics.overApproximationBuildTime.stop(); - return nullptr; - } - - explorer.finishExploration(); - statistics.overApproximationBuildTime.stop(); - - statistics.overApproximationCheckTime.start(); - explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); - statistics.overApproximationCheckTime.stop(); - - return std::make_shared(std::move(explorer)); - } - - template - void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { - /*TODO: - template - std::shared_ptr> - ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, - std::vector &observationResolutionVector, - bool computeRewards, - std::shared_ptr> refinementComponents, - std::set changedObservations, - boost::optional> overApproximationMap, - boost::optional> underApproximationMap, - uint64_t maxUaModelSize) { - bool initialBoundMapsSet = overApproximationMap && underApproximationMap; - std::map initialOverMap; - std::map initialUnderMap; - if (initialBoundMapsSet) { - initialOverMap = overApproximationMap.value(); - initialUnderMap = underApproximationMap.value(); - } - // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively - std::map>> subSimplexCache; - std::map> lambdaCache; - - // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace - std::map weightedSumOverMap; - std::map weightedSumUnderMap; - - statistics.overApproximationBuildTime.start(); - - uint64_t nextBeliefId = refinementComponents->beliefList.size(); - uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); - std::set relevantStates; // The MDP states where the observation has changed - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { - relevantStates.insert(iter.second); - } - } - - std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed - for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { - for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { - for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( - state, row).begin(); - itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) { - if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) { - statesAndActionsToCheck.insert(std::make_pair(state, row)); - break; - } - } - } - } - - std::deque beliefsToBeExpanded; - - std::map, std::map> transitionsStateActionPair; - for (auto const &stateActionPair : statesAndActionsToCheck) { - auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); - auto action = stateActionPair.second; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], - action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - // Expand and triangulate the successor - uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - //TODO add caching - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - } - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; + // Check whether we expand this state/action pair + // We always expand if we are not doing refinement of if the state was not available in the "old" MDP. + // Otherwise, a heuristic decides. + bool expandStateAction = true; + if (refine && overApproximation->currentStateHasOldBehavior()) { + // Compute a rating of the current state/action pair + ValueType stateActionRating = storm::utility::one(); + gatheredSuccessorObservations.clear(); + overApproximation->gatherSuccessorObservationInformationAtCurrentState(action, gatheredSuccessorObservations); + for (auto const& obsInfo : gatheredSuccessorObservations) { + if (refinedObservations.get(obsInfo.first)) { + ValueType obsRating = rateObservation(obsInfo.second); + stateActionRating = std::min(stateActionRating, obsRating); } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } + // Only refine if this rating is below the doubled refinementAggressiveness + expandStateAction = stateActionRating < storm::utility::convertNumber(2.0) * (*refinementAggressiveness); } - } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[stateActionPair] = transitionInActionBelief; - } - } - - std::set stoppedExplorationStateSet; - - // Expand newly added beliefs - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = refinementComponents->beliefIsTarget[currId]; - - if (initialBoundMapsSet && - cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { - STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId] - << std::endl) - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]}, - {0, storm::utility::one() - - weightedSumOverMap[currId]}}; - stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId)); - continue; - } - - if (isTarget) { - // Depending on whether we compute rewards, we select the right initial result - // MDP stuff - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = - {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one()}}; - } else { - uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, - targetObservations, refinementComponents->beliefList[currId], action, observation, - nextBeliefId); - nextBeliefId = refinementComponents->beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, - observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], - pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; + if (expandStateAction) { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); + for (auto const& successor : successorGridPoints) { + bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first) : overApproximation->computeUpperValueBoundAtBelief(successor.first)); } } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; - refinementComponents->beliefList.push_back(gridBelief); - refinementComponents->beliefGrid.push_back(gridBelief); - refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (initialBoundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); - } - weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; - weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; - } - beliefsToBeExpanded.push_back(nextBeliefId); - refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); - transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; - ++nextBeliefId; - ++nextStateId; - } else { - transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; - } + if (stopExploration) { + if (computeRewards) { + overApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } - } - if (!transitionInActionBelief.empty()) { - transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + overApproximation->computeRewardAtCurrentState(action, truncationValueBound); + } + } else { + // Do not refine here + overApproximation->restoreOldBehaviorAtCurrentState(action); } } } @@ -606,173 +398,85 @@ namespace storm { break; } } - - statistics.overApproximationStates = nextStateId; + // TODO: Drop unreachable states (sometimes?) + statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); - // Return the result from the old refinement step - return refinementComponents; - } - storm::models::sparse::StateLabeling mdpLabeling(nextStateId); - mdpLabeling.addLabel("init"); - mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); - mdpLabeling.addLabelToState("target", 1); - uint_fast64_t currentRow = 0; - uint_fast64_t currentRowGroup = 0; - storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); - auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix(); - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 0, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - smb.newRowGroup(currentRow); - smb.addNextValue(currentRow, 1, storm::utility::one()); - ++currentRow; - ++currentRowGroup; - for (uint64_t state = 2; state < nextStateId; ++state) { - smb.newRowGroup(currentRow); - //STORM_PRINT("Loop State: " << state << std::endl) - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front()); - bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)]; - for (uint64_t action = 0; action < numChoices; ++action) { - if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) { - for (auto const &entry : oldTransitionMatrix.getRow(state, action)) { - smb.addNextValue(currentRow, entry.getColumn(), entry.getValue()); - } - } else { - for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) { - smb.addNextValue(currentRow, iter.first, iter.second); - } - } - ++currentRow; - if (isTarget) { - // If the state is a target, we only have one action, thus we add the target label and stop the iteration - mdpLabeling.addLabelToState("target", state); - break; - } - if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) { - break; - } - } - ++currentRowGroup; - } - storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { - auto currentBelief = refinementComponents->beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); + return; } - overApproxMdp.printModelInformationToStream(std::cout); - statistics.overApproximationBuildTime.stop(); - STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - auto task = storm::api::createTask(property, false); + overApproximation->finishExploration(); + statistics.overApproximationBuildTime.stop(); + statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + overApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); - if (storm::utility::resources::isTerminate() && !res) { - return refinementComponents; // Return the result from the previous iteration - } - STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl) - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); - auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; - - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, - refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - if (storm::utility::resources::isTerminate() && !underApproxComponents) { - return std::make_unique>( - RefinementComponents{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId}); - } - STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - - return std::make_shared>( - RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, - underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, - refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, - underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); - } - */ } template - std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { + void ApproximatePOMDPModelchecker::buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { statistics.underApproximationBuildTime.start(); - storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); - if (computeRewards) { - explorer.startNewExploration(storm::utility::zero()); + if (!underApproximation->hasComputedValues()) { + // Build a new under approximation + if (computeRewards) { + underApproximation->startNewExploration(storm::utility::zero()); + } else { + underApproximation->startNewExploration(storm::utility::one(), storm::utility::zero()); + } } else { - explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); + // Restart the building process + underApproximation->restartExploration(); } - // Expand the beliefs to generate the grid on-the-fly - if (options.explorationThreshold > storm::utility::zero()) { - STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) - } - while (explorer.hasUnexploredState()) { - uint64_t currId = explorer.exploreNextState(); + // Expand the beliefs + while (underApproximation->hasUnexploredState()) { + uint64_t currId = underApproximation->exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { - explorer.setCurrentStateIsTarget(); - explorer.addSelfloopTransition(); + underApproximation->setCurrentStateIsTarget(); + underApproximation->addSelfloopTransition(); } else { bool stopExploration = false; - if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { - stopExploration = true; - explorer.setCurrentStateIsTruncated(); - } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { - stopExploration = true; - explorer.setCurrentStateIsTruncated(); + if (!underApproximation->currentStateHasOldBehavior()) { + if (storm::utility::abs(underApproximation->getUpperValueBoundAtCurrentState() - underApproximation->getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } else if (underApproximation->getCurrentNumberOfMdpStates() >= maxStateCount) { + stopExploration = true; + underApproximation->setCurrentStateIsTruncated(); + } } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { - ValueType truncationProbability = storm::utility::zero(); - ValueType truncationValueBound = storm::utility::zero(); - auto successors = beliefManager->expand(currId, action); - for (auto const& successor : successors) { - bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); - if (!added) { - STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); - // We did not explore this successor state. Get a bound on the "missing" value - truncationProbability += successor.second; - truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); + // Always restore old behavior if available + if (underApproximation->currentStateHasOldBehavior()) { + underApproximation->restoreOldBehaviorAtCurrentState(action); + } else { + ValueType truncationProbability = storm::utility::zero(); + ValueType truncationValueBound = storm::utility::zero(); + auto successors = beliefManager->expand(currId, action); + for (auto const& successor : successors) { + bool added = underApproximation->addTransitionToBelief(action, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first) : underApproximation->computeLowerValueBoundAtBelief(successor.first)); + } + } + if (stopExploration) { + if (computeRewards) { + underApproximation->addTransitionsToExtraStates(action, truncationProbability); + } else { + underApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + } } - } - if (stopExploration) { if (computeRewards) { - explorer.addTransitionsToExtraStates(action, truncationProbability); - } else { - explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + underApproximation->computeRewardAtCurrentState(action, truncationValueBound); } } - if (computeRewards) { - // The truncationValueBound will be added on top of the reward introduced by the current belief state. - explorer.computeRewardAtCurrentState(action, truncationValueBound); - } } } if (storm::utility::resources::isTerminate()) { @@ -780,25 +484,19 @@ namespace storm { break; } } - statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); + statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); - return nullptr; + return; } - explorer.finishExploration(); + underApproximation->finishExploration(); statistics.underApproximationBuildTime.stop(); statistics.underApproximationCheckTime.start(); - explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); + underApproximation->computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); - return std::make_shared(std::move(explorer)); - } - - template - void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { - // TODO } template class ApproximatePOMDPModelchecker>; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 0d59ac31a..7fbd2ab5e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -18,32 +18,6 @@ namespace storm { namespace modelchecker { typedef boost::bimap bsmap_type; - /** - * Struct containing information which is supposed to be persistent over multiple refinement steps - * - */ - template> - struct RefinementComponents { - std::shared_ptr> overApproxModelPtr; - ValueType overApproxValue; - ValueType underApproxValue; - std::map overApproxMap; - std::map underApproxMap; - std::vector> beliefList; - std::vector> beliefGrid; - std::vector beliefIsTarget; - bsmap_type overApproxBeliefStateMap; - bsmap_type underApproxBeliefStateMap; - uint64_t initialBeliefId; - }; - - template> - struct UnderApproxComponents { - ValueType underApproxValue; - std::map underApproxMap; - bsmap_type underApproxBeliefStateMap; - }; - template class ApproximatePOMDPModelchecker { public: @@ -103,121 +77,16 @@ namespace storm { /** * Builds and checks an MDP that over-approximates the POMDP behavior, i.e. provides an upper bound for maximizing and a lower bound for minimizing properties */ - std::shared_ptr computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager); - - void refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); + void buildOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, bool refine, ValueType* refinementAggressiveness, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation); /** * Builds and checks an MDP that under-approximates the POMDP behavior, i.e. provides a lower bound for maximizing and an upper bound for minimizing properties */ - std::shared_ptr computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager); - - void refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - - -#ifdef REMOVE_THIS - /** - * Helper to compute an underapproximation of the reachability property. - * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. - * - * @param beliefList vector containing already generated beliefs - * @param beliefIsTarget vector containinf for each belief in beliefList true if the belief is a target - * @param targetObservations set of target observations - * @param initialBeliefId Id of the belief corresponding to the POMDP's initial state - * @param min true if minimum value is to be computed - * @param computeReward true if rewards are to be computed - * @param maxModelSize number of states up until which the belief support should be unrolled - * @return struct containing the components generated during the under approximation - */ - std::unique_ptr> computeUnderapproximation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, bool computeReward, - uint64_t maxModelSize); - std::unique_ptr> computeUnderapproximation(std::shared_ptr>> beliefManager, - std::set const &targetObservations, bool min, bool computeReward, - uint64_t maxModelSize, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds); - - /** - * Constructs the initial belief for the given POMDP - * - * @param pomdp the POMDP - * @param id the id the initial belief is given - * @return a belief representing the initial belief - */ - storm::pomdp::Belief - getInitialBelief(uint64_t id); - - - /** - * Subroutine to compute the subsimplex a given belief is contained in and the corresponding lambda values necessary for the Freudenthal triangulation - * - * @param probabilities the probability distribution of the belief - * @param gridResolution the resolution used for the belief - * @param nrStates number of states in the POMDP - * @return a pair containing: 1) the subsimplices 2) the lambda values - */ - std::pair>, std::vector> - computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); + void buildUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation); - - /** - * Helper method to get the probabilities to be in a state with each observation after performing an action - * - * @param belief the belief in which the action is performed - * @param actionIndex the index of the action to be performed - * @return mapping from each observation to the probability to be in a state with that observation after performing the action - */ - std::map computeObservationProbabilitiesAfterAction(storm::pomdp::Belief &belief, - uint64_t actionIndex); - - /** - * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. - * If the belief does not exist yet, it is created and added to the list of all beliefs - * - * @param beliefList data structure to store all generated beliefs - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param targetObservations set of target observations - * @param belief the starting belief - * @param actionIndex the index of the action to be performed - * @param observation the observation after the action was performed - * @return the resulting belief (observation and distribution) - */ - uint64_t getBeliefAfterActionAndObservation(std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - storm::pomdp::Belief &belief, - uint64_t actionIndex, uint32_t observation, uint64_t id); - - /** - * Helper to get the id of a Belief stored in a given vector structure - * - * @param grid the vector on which the lookup is performed - * @param observation the observation of the belief - * @param probabilities the probability distribution over the POMDP states of the Belief - * @return if the belief was found in the vector, the belief's ID, otherwise -1 - */ - uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::map &probabilities); - - /** - * Helper method to build the transition matrix from a data structure containing transations - * - * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) - * @return sparseMatrix representing the transitions - */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); - - /** - * Get the reward for performing an action in a given belief - * - * @param action the index of the action to be performed - * @param belief the belief in which the action is performed - * @return the reward earned by performing the action in the belief - */ - ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); - ValueType getRewardAfterAction(uint64_t action, std::map const& belief); -#endif //REMOVE_THIS + ValueType rateObservation(typename ExplorerType::SuccessorObservationInformation const& info); + + std::vector getObservationRatings(std::shared_ptr const& overApproximation); struct Statistics { Statistics();