diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 37a29f9b1..27cbdb2bc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -10,6 +10,7 @@ #include "storm/modelchecker/results/CheckResult.h" #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" +#include "storm/modelchecker/hints/ExplicitModelCheckerHint.cpp" #include "storm/api/properties.h" #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" @@ -21,7 +22,7 @@ namespace storm { ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { precision = 0.000000001; cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); - useMdp = false; + useMdp = true; maxIterations = 1000; } @@ -51,6 +52,51 @@ namespace storm { ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { + //TODO For the prototypical implementation, I put the refinement loop here. I'll change this later on + + // we define some positional scheduler for the POMDP as an experimental lower bound + storm::storage::Scheduler pomdpScheduler(pomdp.getNumberOfStates()); + for (uint32_t obs = 0; obs < pomdp.getNrObservations(); ++obs) { + auto obsStates = pomdp.getStatesWithObservation(obs); + // select a random action for all states with the same observation + uint64_t chosenAction = std::rand() % pomdp.getNumberOfChoices(obsStates.front()); + for (auto const &state : obsStates) { + pomdpScheduler.setChoice(chosenAction, state); + } + } + + storm::models::sparse::StateLabeling underlyingMdpLabeling(pomdp.getStateLabeling()); + underlyingMdpLabeling.addLabel("goal"); + std::vector goalStates; + for (auto const &targetObs : targetObservations) { + for (auto const &goalState : pomdp.getStatesWithObservation(targetObs)) { + underlyingMdpLabeling.addLabelToState("goal", goalState); + } + } + storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); + auto underlyingModel = std::static_pointer_cast>( + std::make_shared>(underlyingMdp)); + std::string initPropString = computeRewards ? "R" : "P"; + initPropString += min ? "min" : "max"; + initPropString += "=? [F \"goal\"]"; + std::vector propVector = storm::api::parseProperties(initPropString); + std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); + underlyingMdp.printModelInformationToStream(std::cout); + + std::unique_ptr underlyingRes( + storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); + STORM_LOG_ASSERT(underlyingRes, "Result not exist."); + underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); + auto mdpResultMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + + auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + underApproxModel->printModelInformationToStream(std::cout); + std::unique_ptr underapproxRes( + storm::api::verifyWithSparseEngine(underApproxModel, storm::api::createTask(underlyingProperty, false))); + STORM_LOG_ASSERT(underapproxRes, "Result not exist."); + underapproxRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxModel->getNumberOfStates(), true))); + auto mdpUnderapproxResultMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); + STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) std::vector> beliefList; std::vector beliefIsTarget; @@ -70,6 +116,7 @@ namespace storm { // current ID -> action -> reward std::map> beliefActionRewards; + std::vector hintVector; uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); // Initial belief always has ID 0 @@ -107,6 +154,8 @@ namespace storm { initInserted = true; beliefGrid.push_back(initialBelief); beliefsToBeExpanded.push_back(0); + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); } else { // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; @@ -116,6 +165,9 @@ namespace storm { beliefsToBeExpanded.push_back(nextId); ++nextId; + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); + beliefStateMap[nextId] = mdpStateId; initTransitionInActionBelief[mdpStateId] = initLambdas[j]; ++nextId; @@ -131,9 +183,11 @@ namespace storm { mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + std::map weightedSumOverMap; + std::map weightedSumUnderMap; + // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); @@ -196,9 +250,22 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); + // compute overapproximate value using MDP result map + auto tempWeightedSumOver = storm::utility::zero(); + auto tempWeightedSumUnder = storm::utility::zero(); + for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { + tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(mdpResultMap[i]); + tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(mdpUnderapproxResultMap[i]); + } + beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); + + if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { + hintVector.push_back(tempWeightedSumOver); + } + beliefsToBeExpanded.push_back(nextId); + weightedSumOverMap[nextId] = tempWeightedSumOver; + weightedSumUnderMap[nextId] = tempWeightedSumUnder; beliefStateMap[nextId] = mdpStateId; transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; @@ -267,12 +334,15 @@ namespace storm { } overApproxMdp.printModelInformationToStream(std::cout); + /* storm::utility::Stopwatch overApproxTimer(true); auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, subSimplexCache, lambdaCache, result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop(); + overApproxTimer.stop();*/ + auto underApprox = storm::utility::zero(); + auto overApprox = storm::utility::one(); /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, @@ -282,10 +352,10 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl);*/ + << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);*/ auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); @@ -298,7 +368,13 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + + auto task = storm::api::createTask(property, true); + auto hint = storm::modelchecker::ExplicitModelCheckerHint(); + hint.setResultHint(hintVector); + auto hintPtr = std::make_shared>(hint); + task.setHint(hintPtr); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); STORM_LOG_ASSERT(res, "Result not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); @@ -561,7 +637,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map> chosenActions, + std::map> &chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards, bool generateMdp) { std::set visitedBelieves; @@ -689,8 +765,7 @@ namespace storm { template storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix( - std::vector>> transitions) { + ApproximatePOMDPModelchecker::buildTransitionMatrix(std::vector>> &transitions) { uint_fast64_t currentRow = 0; uint_fast64_t currentRowGroup = 0; uint64_t nrColumns = transitions.size(); @@ -809,7 +884,7 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> const &grid, uint32_t observation, - std::vector probabilities) { + std::vector &probabilities) { // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { @@ -929,7 +1004,7 @@ namespace storm { template std::pair>, std::vector> ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::vector probabilities, uint64_t resolution) { + std::vector &probabilities, uint64_t resolution) { // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper uint64_t probSize = probabilities.size(); @@ -989,7 +1064,7 @@ namespace storm { std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) @@ -1009,10 +1084,8 @@ namespace storm { template storm::pomdp::Belief - ApproximatePOMDPModelchecker::getBeliefAfterAction( - storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, - uint64_t actionIndex, uint64_t id) { + ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -1030,9 +1103,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, - std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, - uint32_t observation, - uint64_t id) { + std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, + uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -1075,7 +1147,7 @@ namespace storm { template ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, - uint64_t action, storm::pomdp::Belief belief) { + uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 6d8b50d43..aada759d2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -141,7 +141,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, - std::map> chosenActions, + std::map> &chosenActions, uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** @@ -161,7 +161,7 @@ namespace storm { * @return */ std::pair>, std::vector> - computeSubSimplexAndLambdas(std::vector probabilities, uint64_t gridResolution); + computeSubSimplexAndLambdas(std::vector &probabilities, uint64_t gridResolution); /** @@ -188,7 +188,7 @@ namespace storm { */ std::map computeObservationProbabilitiesAfterAction( storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex); /** @@ -201,13 +201,12 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - uint64_t - getBeliefAfterActionAndObservation( + uint64_t getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - storm::pomdp::Belief belief, + storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id); /** @@ -219,8 +218,8 @@ namespace storm { * @return */ storm::pomdp::Belief - getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id); + getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, + uint64_t id); /** * Helper to get the id of a Belief stored in a given vector structure @@ -230,15 +229,15 @@ namespace storm { * @return */ uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::vector probabilities); + std::vector &probabilities); /** * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) * @return sparseMatrix representing the transitions */ - storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); + storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> &transitions); - ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief &belief); ValueType overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList,