#include "ApproximatePOMDPModelchecker.h" #include #include #include "storm-pomdp/analysis/FormulaInformation.h" #include "storm/utility/ConstantsComparator.h" #include "storm/utility/NumberTraits.h" #include "storm/utility/graph.h" #include "storm/logic/Formulas.h" #include "storm/models/sparse/Dtmc.h" #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/SparseDtmcPrctlModelChecker.h" #include "storm/utility/vector.h" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-pomdp/builder/BeliefMdpExplorer.h" #include "storm-pomdp/modelchecker/TrivialPomdpValueBoundsModelChecker.h" #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" #include "storm/exceptions/NotSupportedException.h" namespace storm { namespace pomdp { namespace modelchecker { template ApproximatePOMDPModelchecker::Options::Options() { initialGridResolution = 10; explorationThreshold = storm::utility::zero(); doRefinement = true; refinementPrecision = storm::utility::convertNumber(1e-4); numericPrecision = storm::NumberTraits::IsExact ? storm::utility::zero() : storm::utility::convertNumber(1e-9); cacheSubsimplices = false; } template ApproximatePOMDPModelchecker::Result::Result(ValueType lower, ValueType upper) : lowerBound(lower), upperBound(upper) { // Intentionally left empty } template typename ApproximatePOMDPModelchecker::ValueType ApproximatePOMDPModelchecker::Result::diff(bool relative) const { ValueType diff = upperBound - lowerBound; if (diff < storm::utility::zero()) { STORM_LOG_WARN_COND(diff >= 1e-6, "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << "."); diff = storm::utility::zero(); } if (relative && !storm::utility::isZero(upperBound)) { diff /= upperBound; } return diff; } template ApproximatePOMDPModelchecker::Statistics::Statistics() : overApproximationBuildAborted(false), underApproximationBuildAborted(false), aborted(false) { // intentionally left empty; } template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(PomdpModelType const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); } template typename ApproximatePOMDPModelchecker::Result ApproximatePOMDPModelchecker::check(storm::logic::Formula const& formula) { // Reset all collected statistics statistics = Statistics(); // Extract the relevant information from the formula auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp, formula); // Compute some initial bounds on the values for each state of the pomdp auto initialPomdpValueBounds = TrivialPomdpValueBoundsModelChecker>(pomdp).getValueBounds(formula, formulaInfo); Result result(initialPomdpValueBounds.lower[pomdp.getInitialStates().getNextSetIndex(0)], initialPomdpValueBounds.upper[pomdp.getInitialStates().getNextSetIndex(0)]); boost::optional rewardModelName; if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); if (formulaInfo.isNonNestedReachabilityProbability()) { if (!formulaInfo.getSinkStates().empty()) { auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp.getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); } } else { // Expected reward formula! rewardModelName = formulaInfo.getRewardModelName(); } } else { STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); } if (options.doRefinement) { refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } else { computeReachabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), rewardModelName, initialPomdpValueBounds.lower, initialPomdpValueBounds.upper, result); } if (storm::utility::resources::isTerminate()) { statistics.aborted = true; } return result; } template void ApproximatePOMDPModelchecker::printStatisticsToStream(std::ostream& stream) const { stream << "##### Grid Approximation Statistics ######" << std::endl; stream << "# Input model: " << std::endl; pomdp.printModelInformationToStream(stream); stream << "# Max. Number of states with same observation: " << pomdp.getMaxNrStatesWithSameObservation() << std::endl; if (statistics.aborted) { stream << "# Computation aborted early" << std::endl; } // Refinement information: if (statistics.refinementSteps) { stream << "# Number of refinement steps: " << statistics.refinementSteps.get() << std::endl; } // The overapproximation MDP: if (statistics.overApproximationStates) { stream << "# Number of states in the "; if (options.doRefinement) { stream << "final "; } stream << "grid MDP for the over-approximation: "; if (statistics.overApproximationBuildAborted) { stream << ">="; } stream << statistics.overApproximationStates.get() << std::endl; stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << std::endl; stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << std::endl; } // The underapproximation MDP: if (statistics.underApproximationStates) { stream << "# Number of states in the "; if (options.doRefinement) { stream << "final "; } stream << "grid MDP for the under-approximation: "; if (statistics.underApproximationBuildAborted) { stream << ">="; } stream << statistics.underApproximationStates.get() << std::endl; stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << std::endl; stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << std::endl; } stream << "##########################################" << std::endl; } template void ApproximatePOMDPModelchecker::computeReachabilityOTF(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } uint64_t underApproxSizeThreshold = 0; { // Overapproximation std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, manager); if (approx) { STORM_PRINT_AND_LOG("Explored and checked Over-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.lowerBound : result.upperBound; resultValue = approx->getComputedValueAtInitialState(); underApproxSizeThreshold = approx->getExploredMdp()->getNumberOfStates(); } } { // Underapproximation (Uses a fresh Belief manager) auto manager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { manager->setRewardModel(rewardModelName); } auto approx = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, manager); if (approx) { STORM_PRINT_AND_LOG("Explored and checked Under-Approximation MDP:\n"); approx->getExploredMdp()->printModelInformationToStream(std::cout); ValueType& resultValue = min ? result.upperBound : result.lowerBound; resultValue = approx->getComputedValueAtInitialState(); } } } template void ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, boost::optional rewardModelName, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, Result& result) { // Set up exploration data std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); auto beliefManager = std::make_shared(pomdp, options.numericPrecision); if (rewardModelName) { beliefManager->setRewardModel(rewardModelName); } // OverApproximaion auto overApproximation = computeOverApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, observationResolutionVector, beliefManager); if (!overApproximation) { return; } ValueType& overApproxValue = min ? result.lowerBound : result.upperBound; overApproxValue = overApproximation->getComputedValueAtInitialState(); // UnderApproximation TODO: use same belief manager?) uint64_t underApproxSizeThreshold = overApproximation->getExploredMdp()->getNumberOfStates(); auto underApproximation = computeUnderApproximation(targetObservations, min, rewardModelName.is_initialized(), lowerPomdpValueBounds, upperPomdpValueBounds, underApproxSizeThreshold, beliefManager); if (!underApproximation) { return; } ValueType& underApproxValue = min ? result.upperBound : result.lowerBound; underApproxValue = underApproximation->getComputedValueAtInitialState(); // ValueType lastMinScore = storm::utility::infinity(); // Start refinement statistics.refinementSteps = 0; while (result.diff() > options.refinementPrecision) { if (storm::utility::resources::isTerminate()) { break; } // TODO the actual refinement /* // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); std::vector beliefCount(pomdp.getNrObservations(), 0); bsmap_type::right_map::const_iterator underApproxStateBeliefIter = res->underApproxBeliefStateMap.right.begin(); while (underApproxStateBeliefIter != res->underApproxBeliefStateMap.right.end()) { auto currentBelief = res->beliefList[underApproxStateBeliefIter->second]; beliefCount[currentBelief.observation] += 1; bsmap_type::left_const_iterator overApproxBeliefStateIter = res->overApproxBeliefStateMap.left.find(underApproxStateBeliefIter->second); if (overApproxBeliefStateIter != res->overApproxBeliefStateMap.left.end()) { // If there is an over-approximate value for the belief, use it auto diff = res->overApproxMap[overApproxBeliefStateIter->second] - res->underApproxMap[underApproxStateBeliefIter->first]; obsAccumulator[currentBelief.observation] += diff; } else { //otherwise, we approximate a value TODO this is critical, we have to think about it auto overApproxValue = storm::utility::zero(); auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); auto subSimplex = temp.first; auto lambdas = temp.second; for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); if (approxIter != res->overApproxBeliefStateMap.left.end()) { overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; } else { overApproxValue += lambdas[j]; } } } obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[underApproxStateBeliefIter->first]; } ++underApproxStateBeliefIter; } //for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { // obsAccumulator[i] /= storm::utility::convertNumber(beliefCount[i]); //} changedObservations.clear(); //TODO think about some other scoring methods auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); //if (cc.isEqual(maxAvgDifference, lastMinScore) || cc.isLess(lastMinScore, maxAvgDifference)) { lastMinScore = maxAvgDifference; auto maxRes = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()); STORM_PRINT("Set all to " << maxRes + 1 << std::endl) for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { observationResolutionVector[i] = maxRes + 1; changedObservations.insert(i); } //} else { // lastMinScore = std::min(maxAvgDifference, lastMinScore); // STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) // STORM_PRINT("Last Min Score: " << lastMinScore << std::endl) // //STORM_PRINT("Obs(beliefCount): Score " << std::endl << "-------------------------------------" << std::endl) // for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { //STORM_PRINT(i << "(" << beliefCount[i] << "): " << obsAccumulator[i]) // if (cc.isEqual(obsAccumulator[i], maxAvgDifference)) { //STORM_PRINT(" *** ") // observationResolutionVector[i] += 1; // changedObservations.insert(i); // } //STORM_PRINT(std::endl) // } //} if (underApproxModelSize < std::numeric_limits::max() - 101) { underApproxModelSize += 100; } STORM_PRINT( "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); */ ++statistics.refinementSteps.get(); } } template std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, std::vector& observationResolutionVector, std::shared_ptr& beliefManager) { statistics.overApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { explorer.startNewExploration(storm::utility::zero()); } else { explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } // Expand the beliefs to generate the grid on-the-fly while (explorer.hasUnexploredState()) { uint64_t currId = explorer.exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { explorer.setCurrentStateIsTarget(); explorer.addSelfloopTransition(); } else { bool stopExploration = false; if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; explorer.setCurrentStateIsTruncated(); } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { ValueType truncationProbability = storm::utility::zero(); ValueType truncationValueBound = storm::utility::zero(); auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector); for (auto const& successor : successorGridPoints) { bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); if (!added) { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; truncationValueBound += successor.second * (min ? explorer.computeLowerValueBoundAtBelief(successor.first) : explorer.computeUpperValueBoundAtBelief(successor.first)); } } if (stopExploration) { if (computeRewards) { explorer.addTransitionsToExtraStates(action, truncationProbability); } else { explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } if (computeRewards) { // The truncationValueBound will be added on top of the reward introduced by the current belief state. explorer.computeRewardAtCurrentState(action, truncationValueBound); } } } if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildAborted = true; break; } } statistics.overApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return nullptr; } explorer.finishExploration(); statistics.overApproximationBuildTime.stop(); statistics.overApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.overApproximationCheckTime.stop(); return std::make_shared(std::move(explorer)); } template void ApproximatePOMDPModelchecker::refineOverApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector& observationResolutionVector, std::shared_ptr& beliefManager, std::shared_ptr& overApproximation) { /*TODO: template std::shared_ptr> ApproximatePOMDPModelchecker::computeRefinementStep(std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, std::shared_ptr> refinementComponents, std::set changedObservations, boost::optional> overApproximationMap, boost::optional> underApproximationMap, uint64_t maxUaModelSize) { bool initialBoundMapsSet = overApproximationMap && underApproximationMap; std::map initialOverMap; std::map initialUnderMap; if (initialBoundMapsSet) { initialOverMap = overApproximationMap.value(); initialUnderMap = underApproximationMap.value(); } // Note that a persistent cache is not support by the current data structure. The resolution for the given belief also has to be stored somewhere to cache effectively std::map>> subSimplexCache; std::map> lambdaCache; // Map to save the weighted values resulting from the initial preprocessing for newly added beliefs / indices in beliefSpace std::map weightedSumOverMap; std::map weightedSumUnderMap; statistics.overApproximationBuildTime.start(); uint64_t nextBeliefId = refinementComponents->beliefList.size(); uint64_t nextStateId = refinementComponents->overApproxModelPtr->getNumberOfStates(); std::set relevantStates; // The MDP states where the observation has changed for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { auto currentBelief = refinementComponents->beliefList[iter.first]; if (changedObservations.find(currentBelief.observation) != changedObservations.end()) { relevantStates.insert(iter.second); } } std::set> statesAndActionsToCheck; // The predecessors of states where the observation has changed for (uint64_t state = 0; state < refinementComponents->overApproxModelPtr->getNumberOfStates(); ++state) { for (uint_fast64_t row = 0; row < refinementComponents->overApproxModelPtr->getTransitionMatrix().getRowGroupSize(state); ++row) { for (typename storm::storage::SparseMatrix::const_iterator itEntry = refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow( state, row).begin(); itEntry != refinementComponents->overApproxModelPtr->getTransitionMatrix().getRow(state, row).end(); ++itEntry) { if (relevantStates.find(itEntry->getColumn()) != relevantStates.end()) { statesAndActionsToCheck.insert(std::make_pair(state, row)); break; } } } } std::deque beliefsToBeExpanded; std::map, std::map> transitionsStateActionPair; for (auto const &stateActionPair : statesAndActionsToCheck) { auto currId = refinementComponents->overApproxBeliefStateMap.right.at(stateActionPair.first); auto action = stateActionPair.second; std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { // Expand and triangulate the successor uint32_t observation = iter->first; uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); nextBeliefId = refinementComponents->beliefList.size(); //Triangulate here and put the possibly resulting belief in the grid std::vector> subSimplex; std::vector lambdas; //TODO add caching if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; if (options.cacheSubsimplices) { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } } for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); if (approxId == uint64_t(-1)) { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; refinementComponents->beliefList.push_back(gridBelief); refinementComponents->beliefGrid.push_back(gridBelief); refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); // compute overapproximate value using MDP result map if (initialBoundMapsSet) { auto tempWeightedSumOver = storm::utility::zero(); auto tempWeightedSumUnder = storm::utility::zero(); for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); } weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; } beliefsToBeExpanded.push_back(nextBeliefId); refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; ++nextBeliefId; ++nextStateId; } else { transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } } } } if (!transitionInActionBelief.empty()) { transitionsStateActionPair[stateActionPair] = transitionInActionBelief; } } std::set stoppedExplorationStateSet; // Expand newly added beliefs while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = refinementComponents->beliefIsTarget[currId]; if (initialBoundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], storm::utility::convertNumber(options.explorationThreshold))) { STORM_PRINT("Stop Exploration in State " << refinementComponents->overApproxBeliefStateMap.left.at(currId) << " with Value " << weightedSumOverMap[currId] << std::endl) transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}; stoppedExplorationStateSet.insert(refinementComponents->overApproxBeliefStateMap.left.at(currId)); continue; } if (isTarget) { // Depending on whether we compute rewards, we select the right initial result // MDP stuff transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), 0)] = {{refinementComponents->overApproxBeliefStateMap.left.at(currId), storm::utility::one()}}; } else { uint64_t representativeState = pomdp.getStatesWithObservation(refinementComponents->beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(refinementComponents->beliefList[currId], action); std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up uint64_t idNextBelief = getBeliefAfterActionAndObservation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->beliefList[currId], action, observation, nextBeliefId); nextBeliefId = refinementComponents->beliefList.size(); //Triangulate here and put the possibly resulting belief in the grid std::vector> subSimplex; std::vector lambdas; if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { auto temp = computeSubSimplexAndLambdas(refinementComponents->beliefList[idNextBelief].probabilities, observationResolutionVector[refinementComponents->beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; if (options.cacheSubsimplices) { subSimplexCache[idNextBelief] = subSimplex; lambdaCache[idNextBelief] = lambdas; } } for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { auto approxId = getBeliefIdInVector(refinementComponents->beliefGrid, observation, subSimplex[j]); if (approxId == uint64_t(-1)) { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list storm::pomdp::Belief gridBelief = {nextBeliefId, observation, subSimplex[j]}; refinementComponents->beliefList.push_back(gridBelief); refinementComponents->beliefGrid.push_back(gridBelief); refinementComponents->beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); // compute overapproximate value using MDP result map if (initialBoundMapsSet) { auto tempWeightedSumOver = storm::utility::zero(); auto tempWeightedSumUnder = storm::utility::zero(); for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(initialOverMap[i]); tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(initialUnderMap[i]); } weightedSumOverMap[nextBeliefId] = tempWeightedSumOver; weightedSumUnderMap[nextBeliefId] = tempWeightedSumUnder; } beliefsToBeExpanded.push_back(nextBeliefId); refinementComponents->overApproxBeliefStateMap.insert(bsmap_type::value_type(nextBeliefId, nextStateId)); transitionInActionBelief[nextStateId] = iter->second * lambdas[j]; ++nextBeliefId; ++nextStateId; } else { transitionInActionBelief[refinementComponents->overApproxBeliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; } } } } if (!transitionInActionBelief.empty()) { transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; } } } if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildAborted = true; break; } } statistics.overApproximationStates = nextStateId; if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); // Return the result from the old refinement step return refinementComponents; } storm::models::sparse::StateLabeling mdpLabeling(nextStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); mdpLabeling.addLabelToState("init", refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)); mdpLabeling.addLabelToState("target", 1); uint_fast64_t currentRow = 0; uint_fast64_t currentRowGroup = 0; storm::storage::SparseMatrixBuilder smb(0, nextStateId, 0, false, true); auto oldTransitionMatrix = refinementComponents->overApproxModelPtr->getTransitionMatrix(); smb.newRowGroup(currentRow); smb.addNextValue(currentRow, 0, storm::utility::one()); ++currentRow; ++currentRowGroup; smb.newRowGroup(currentRow); smb.addNextValue(currentRow, 1, storm::utility::one()); ++currentRow; ++currentRowGroup; for (uint64_t state = 2; state < nextStateId; ++state) { smb.newRowGroup(currentRow); //STORM_PRINT("Loop State: " << state << std::endl) uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(refinementComponents->beliefList[refinementComponents->overApproxBeliefStateMap.right.at(state)].observation).front()); bool isTarget = refinementComponents->beliefIsTarget[refinementComponents->overApproxBeliefStateMap.right.at(state)]; for (uint64_t action = 0; action < numChoices; ++action) { if (transitionsStateActionPair.find(std::make_pair(state, action)) == transitionsStateActionPair.end()) { for (auto const &entry : oldTransitionMatrix.getRow(state, action)) { smb.addNextValue(currentRow, entry.getColumn(), entry.getValue()); } } else { for (auto const &iter : transitionsStateActionPair[std::make_pair(state, action)]) { smb.addNextValue(currentRow, iter.first, iter.second); } } ++currentRow; if (isTarget) { // If the state is a target, we only have one action, thus we add the target label and stop the iteration mdpLabeling.addLabelToState("target", state); break; } if (stoppedExplorationStateSet.find(state) != stoppedExplorationStateSet.end()) { break; } } ++currentRowGroup; } storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); if (computeRewards) { storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { auto currentBelief = refinementComponents->beliefList[iter.first]; auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { // Add the reward mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), currentBelief)); } } overApproxMdp.addRewardModel("std", mdpRewardModel); overApproxMdp.restrictRewardModels(std::set({"std"})); } overApproxMdp.printModelInformationToStream(std::cout); statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); std::string propertyString = computeRewards ? "R" : "P"; propertyString += min ? "min" : "max"; propertyString += "=? [F \"target\"]"; std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); auto task = storm::api::createTask(property, false); statistics.overApproximationCheckTime.start(); std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); statistics.overApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { return refinementComponents; // Return the result from the previous iteration } STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << std::endl) STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); auto overApprox = overApproxResultMap[refinementComponents->overApproxBeliefStateMap.left.at(refinementComponents->initialBeliefId)]; //auto underApprox = weightedSumUnderMap[initialBelief.id]; auto underApproxComponents = computeUnderapproximation(refinementComponents->beliefList, refinementComponents->beliefIsTarget, targetObservations, refinementComponents->initialBeliefId, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); if (storm::utility::resources::isTerminate() && !underApproxComponents) { return std::make_unique>( RefinementComponents{modelPtr, overApprox, refinementComponents->underApproxValue, overApproxResultMap, {}, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, {}, refinementComponents->initialBeliefId}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); return std::make_shared>( RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, underApproxComponents->underApproxMap, refinementComponents->beliefList, refinementComponents->beliefGrid, refinementComponents->beliefIsTarget, refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap, refinementComponents->initialBeliefId}); } */ } template std::shared_ptr::ExplorerType> ApproximatePOMDPModelchecker::computeUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, std::vector const& lowerPomdpValueBounds, std::vector const& upperPomdpValueBounds, uint64_t maxStateCount, std::shared_ptr& beliefManager) { statistics.underApproximationBuildTime.start(); storm::builder::BeliefMdpExplorer> explorer(beliefManager, lowerPomdpValueBounds, upperPomdpValueBounds); if (computeRewards) { explorer.startNewExploration(storm::utility::zero()); } else { explorer.startNewExploration(storm::utility::one(), storm::utility::zero()); } // Expand the beliefs to generate the grid on-the-fly if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } while (explorer.hasUnexploredState()) { uint64_t currId = explorer.exploreNextState(); uint32_t currObservation = beliefManager->getBeliefObservation(currId); if (targetObservations.count(currObservation) != 0) { explorer.setCurrentStateIsTarget(); explorer.addSelfloopTransition(); } else { bool stopExploration = false; if (storm::utility::abs(explorer.getUpperValueBoundAtCurrentState() - explorer.getLowerValueBoundAtCurrentState()) < options.explorationThreshold) { stopExploration = true; explorer.setCurrentStateIsTruncated(); } else if (explorer.getCurrentNumberOfMdpStates() >= maxStateCount) { stopExploration = true; explorer.setCurrentStateIsTruncated(); } for (uint64 action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) { ValueType truncationProbability = storm::utility::zero(); ValueType truncationValueBound = storm::utility::zero(); auto successors = beliefManager->expand(currId, action); for (auto const& successor : successors) { bool added = explorer.addTransitionToBelief(action, successor.first, successor.second, stopExploration); if (!added) { STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); // We did not explore this successor state. Get a bound on the "missing" value truncationProbability += successor.second; truncationValueBound += successor.second * (min ? explorer.computeUpperValueBoundAtBelief(successor.first) : explorer.computeLowerValueBoundAtBelief(successor.first)); } } if (stopExploration) { if (computeRewards) { explorer.addTransitionsToExtraStates(action, truncationProbability); } else { explorer.addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound); } } if (computeRewards) { // The truncationValueBound will be added on top of the reward introduced by the current belief state. explorer.computeRewardAtCurrentState(action, truncationValueBound); } } } if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildAborted = true; break; } } statistics.underApproximationStates = explorer.getCurrentNumberOfMdpStates(); if (storm::utility::resources::isTerminate()) { statistics.underApproximationBuildTime.stop(); return nullptr; } explorer.finishExploration(); statistics.underApproximationBuildTime.stop(); statistics.underApproximationCheckTime.start(); explorer.computeValuesOfExploredMdp(min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); statistics.underApproximationCheckTime.stop(); return std::make_shared(std::move(explorer)); } template void ApproximatePOMDPModelchecker::refineUnderApproximation(std::set const &targetObservations, bool min, bool computeRewards, uint64_t maxStateCount, std::shared_ptr& beliefManager, std::shared_ptr& underApproximation) { // TODO } template class ApproximatePOMDPModelchecker>; template class ApproximatePOMDPModelchecker>; } } }