From a5fc9fc5b75a8f802d7cdd169f99741d73750396 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 24 Jan 2020 11:05:25 +0100 Subject: [PATCH] Added naive underapproximation by unrolling the belief support up to a given size --- .../ApproximatePOMDPModelchecker.cpp | 311 ++++-------------- .../ApproximatePOMDPModelchecker.h | 17 +- 2 files changed, 66 insertions(+), 262 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 4144bf91c..84c055d1f 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -88,9 +88,10 @@ namespace storm { std::vector observationResolutionVector(pomdp.getNrObservations(), gridResolution); auto overRes = storm::utility::one(); auto underRes = storm::utility::zero(); + uint64_t underApproxModelSize = 50; uint64_t refinementCounter = 1; std::unique_ptr> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, - overApproxMap, underApproxMap); + overApproxMap, underApproxMap, underApproxModelSize); // TODO the actual refinement return res; } @@ -102,10 +103,11 @@ namespace storm { std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap, - boost::optional> underApproximationMap) { + boost::optional> underApproximationMap, + uint64_t maxUaModelSize) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap, - underApproximationMap); + underApproximationMap, maxUaModelSize); return std::make_unique>(POMDPCheckResult{result->overApproxValue, result->underApproxValue}); } @@ -117,7 +119,8 @@ namespace storm { std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap, - boost::optional> underApproximationMap) { + boost::optional> underApproximationMap, + uint64_t maxUaModelSize) { bool boundMapsSet = overApproximationMap && underApproximationMap; std::map overMap; std::map underMap; @@ -413,183 +416,8 @@ namespace storm { auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) - - // Prototypical implementation of the underapproximation - WRONG - /* - // The map has the following form: (beliefId, action) --> stateId - uamap_type uaStateMap; - // Reserve states 0 and 1 as always sink/goal states - std::map>> uaTransitions = {{0,{{{0, storm::utility::one()}}}}, - {1,{{{1, storm::utility::one()}}}}}; - // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector uaTargetStates = {1}; - uint64_t uaStateId = 2; - - // for beliefs which are both in the actual belief support and the grid, we use the max value for the action to indicate the support belief - uaStateMap.insert(uamap_type::value_type(std::make_pair(initialBelief.id, std::numeric_limits::max()), uaStateId)); - ++uaStateId; - beliefsToBeExpanded.push_back(initialBelief.id); - while(!beliefsToBeExpanded.empty()){ - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - - if(isTarget){ - // For target states we add a self-loop - uaTargetStates.push_back(uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max()))); - uaTransitions[uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max()))] = {{{uaStateMap.left.at(std::make_pair(currId, std::numeric_limits::max())), storm::utility::one()}}}; - } else { - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front()); - //Triangulate the current belief to determine its approximation bases - std::vector> subSimplex; - std::vector lambdas; - if (cacheSubsimplices && subSimplexCache.count(currId) > 0) { - subSimplex = subSimplexCache[currId]; - lambdas = lambdaCache[currId]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[currId].probabilities, observationResolutionVector[beliefList[currId].observation]); - subSimplex = temp.first; - lambdas = temp.second; - if(cacheSubsimplices){ - subSimplexCache[currId] = subSimplex; - lambdaCache[currId] = lambdas; - } - } - std::deque> approxToExpand; - std::vector> approxActionTransitions(numChoices); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())){ - uint64_t approxId = getBeliefIdInVector(beliefGrid, beliefList[currId].observation, subSimplex[j]); - //STORM_PRINT("ApproxId " << approxId << std::endl) - if (approxId == uint64_t(-1)) { - // If the approximation base is not yet in the grid, we add it and it has to be expanded - storm::pomdp::Belief gridBelief = {nextId, beliefList[currId].observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(beliefList[currId].observation) != targetObservations.end()); - for(uint64_t action=0; action < numChoices; ++action) { - approxToExpand.push_back(std::make_pair(nextId, action)); - uaStateMap.insert(uamap_type::value_type(std::make_pair(nextId, action), uaStateId)); - approxActionTransitions[action][uaStateId] = lambdas[j]; - ++uaStateId; - } - ++nextId; - } else if(uaStateMap.left.find(std::pair(approxId,0)) != uaStateMap.left.end()){ - // we can check only for (approxId,0) as that always exists if the grid state is mapped - for(uint64_t action=0; action < numChoices; ++action) { - approxActionTransitions[action][uaStateMap.left.at(std::make_pair(approxId,action))] = lambdas[j]; - } - } else { - for(uint64_t action=0; action < numChoices; ++action) { - approxToExpand.push_back(std::make_pair(approxId, action)); - uaStateMap.insert(uamap_type::value_type(std::make_pair(approxId, action), uaStateId)); - approxActionTransitions[action][uaStateId] = lambdas[j]; - ++uaStateId; - } - } - } - } - uaTransitions[uaStateMap.left.at(std::make_pair(currId,std::numeric_limits::max()))] = approxActionTransitions; - // Now expand all approximation bases - while(!approxToExpand.empty()){ - uint64_t approxId = approxToExpand.front().first; - uint64_t approxAction = approxToExpand.front().second; - approxToExpand.pop_front(); - - // Iterate over all actions and determine the successor states - std::map transitionsInAction; - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[approxId], approxAction); - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[approxId], approxAction, - observation, nextId); - nextId = beliefList.size(); - if(uaStateMap.left.find(std::make_pair(idNextBelief, std::numeric_limits::max())) == uaStateMap.left.end()){ - // add state to the mapping and set it t be expanded - uaStateMap.insert(uamap_type::value_type(std::make_pair(idNextBelief,std::numeric_limits::max()), uaStateId)); - ++uaStateId; - beliefsToBeExpanded.push_back(idNextBelief); - } - transitionsInAction[uaStateMap.left.at(std::make_pair(idNextBelief,std::numeric_limits::max()))] = iter->second; - } - uaTransitions[uaStateMap.left.at(std::make_pair(approxId,approxAction))] = {transitionsInAction}; - } - } - } - - - std::vector>> uaTransitionVector; - for(auto iter = uaTransitions.begin(); iter != uaTransitions.end(); ++iter){ - uaTransitionVector.push_back(iter->second); - } - STORM_PRINT(uaTransitions.size() << std::endl) - - storm::models::sparse::StateLabeling uaLabeling(uaTransitions.size()); - uaLabeling.addLabel("init"); - uaLabeling.addLabel("target"); - uaLabeling.addLabel("belief"); - uaLabeling.addLabel("grid"); - uaLabeling.addLabelToState("init", uaStateMap.left.at(std::make_pair(initialBelief.id,std::numeric_limits::max()))); - for (auto targetState : uaTargetStates) { - uaLabeling.addLabelToState("target", targetState); - } - for (auto &iter : uaStateMap.right) { - std::stringstream mapEntryStr; - mapEntryStr << std::to_string(iter.first); - mapEntryStr << " --> "; - mapEntryStr << "[{" + std::to_string(beliefList[iter.second.first].observation) << "} | " ; - for(uint64_t state = 0; state < beliefList[iter.second.first].probabilities.size(); ++state){ - if(beliefList[iter.second.first].probabilities[state] > storm::utility::zero()){ - mapEntryStr << std::to_string(state) << " : " << beliefList[iter.second.first].probabilities[state] << ", "; - } - } - mapEntryStr << "]" << std::endl; - STORM_PRINT(mapEntryStr.str()); - if(!uaLabeling.containsLabel(mapEntryStr.str())){ - uaLabeling.addLabel(mapEntryStr.str()); - } - if(iter.second.second == std::numeric_limits::max()){ - uaLabeling.addLabelToState("belief", iter.first); - } else { - uaLabeling.addLabelToState("grid", iter.first); - } - uaLabeling.addLabelToState(mapEntryStr.str(), iter.first); - } - - //STORM_PRINT(buildTransitionMatrix(uaTransitionVector)) - storm::storage::sparse::ModelComponents uaModelComponents(buildTransitionMatrix(uaTransitionVector), uaLabeling); - storm::models::sparse::Mdp underApproxMdp(uaModelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel uaMdpRewardModel(boost::none, std::vector(uaModelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : uaStateMap.left) { - auto currentBelief = beliefList[iter.first.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - uaMdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } - } - underApproxMdp.addRewardModel("std", uaMdpRewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); - } - underApproxMdp.printModelInformationToStream(std::cout); - - auto uaModel = std::make_shared>(underApproxMdp); - auto uaModelPtr = std::static_pointer_cast>(uaModel); - storm::api::exportSparseModelAsDot(uaModelPtr, "ua_model.dot"); - auto uaTask = storm::api::createTask(property, false); - storm::utility::Stopwatch underApproxTimer(true); - std::unique_ptr uaRes(storm::api::verifyWithSparseEngine(uaModelPtr, uaTask)); - underApproxTimer.stop(); - STORM_LOG_ASSERT(uaRes, "Result not exist."); - uaRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); - auto underApproxResultMap = uaRes->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[uaStateMap.left.at(std::make_pair(initialBelief.id, std::numeric_limits::max()))]; - */ - auto underApprox = weightedSumUnderMap[initialBelief.id]; + //auto underApprox = weightedSumUnderMap[initialBelief.id]; + auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); @@ -837,10 +665,10 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp); + /*ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/ underApproxTimer.stop(); - + auto underApprox = storm::utility::zero(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl << "Time Overapproximation: " << overApproxTimer << std::endl @@ -874,64 +702,60 @@ namespace storm { std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeRewards, bool generateMdp) { + uint64_t initialBeliefId, bool min, + bool computeRewards, uint64_t maxModelSize) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; - std::vector>> transitions; - std::vector targetStates; + std::vector>> transitions = {{{{0, storm::utility::one()}}}, + {{{1, storm::utility::one()}}}}; + std::vector targetStates = {1}; - uint64_t stateId = 0; + uint64_t stateId = 2; beliefStateMap[initialBeliefId] = stateId; ++stateId; + uint64_t nextId = beliefList.size(); + uint64_t counter = 0; // Expand the believes visitedBelieves.insert(initialBeliefId); believesToBeExpanded.push_back(initialBeliefId); while (!believesToBeExpanded.empty()) { + //TODO think of other ways to stop exploration besides model size auto currentBeliefId = believesToBeExpanded.front(); - std::vector> actionTransitionStorage; + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states and save them - std::map transitionsInStateWithAction; - transitionsInStateWithAction[beliefStateMap[currentBeliefId]] = storm::utility::one(); + // add a self-loop to target states targetStates.push_back(beliefStateMap[currentBeliefId]); - actionTransitionStorage.push_back(transitionsInStateWithAction); + transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one()}}}); + } else if (counter > maxModelSize) { + transitions.push_back({{{0, storm::utility::one()}}}); } else { - if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min) : - extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min); - } // Iterate over all actions and add the corresponding transitions - for (auto const &action : chosenActions[currentBeliefId]) { + std::vector> actionTransitionStorage; + //TODO add a way to extract the actions from the over-approx and use them here? + for (uint64_t action = 0; action < numChoices; ++action) { std::map transitionsInStateWithAction; - - for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { + std::map observationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currentBeliefId], action); + for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; + uint64_t nextBeliefId = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId], + action, + observation, nextId); + nextId = beliefList.size(); if (visitedBelieves.insert(nextBeliefId).second) { beliefStateMap[nextBeliefId] = stateId; ++stateId; believesToBeExpanded.push_back(nextBeliefId); + ++counter; } transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; } actionTransitionStorage.push_back(transitionsInStateWithAction); } + transitions.push_back(actionTransitionStorage); } - transitions.push_back(actionTransitionStorage); believesToBeExpanded.pop_front(); } @@ -949,44 +773,28 @@ namespace storm { transitionMatrix.makeRowGroupingTrivial(); } storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); - if (transitionMatrix.hasTrivialRowGrouping()) { - - storm::models::sparse::Dtmc underApproxMc(modelComponents); - storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - if (computeRewards) { - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), - currentBelief)); - } - } - underApproxMc.addRewardModel("std", rewardModel); - underApproxMc.restrictRewardModels(std::set({"std"})); - - model = std::make_shared>(underApproxMc); - } else { - storm::models::sparse::Mdp underApproxMdp(modelComponents); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); - } + storm::models::sparse::Mdp underApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); } - model = std::make_shared>(underApproxMdp); + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); } + model = std::make_shared>(underApproxMdp); + model->printModelInformationToStream(std::cout); + storm::api::exportSparseModelAsDot(model, "ua_model.dot"); + std::string propertyString; if (computeRewards) { propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; @@ -996,10 +804,13 @@ namespace storm { std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, false))); STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); + auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); + auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]]; + + return underApprox; } @@ -1406,4 +1217,4 @@ namespace storm { #endif } } -} \ No newline at end of file +} diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index df3e72af9..867f9d673 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -80,7 +80,7 @@ namespace storm { computeFirstRefinementStep(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none); + boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); /** * @@ -96,7 +96,7 @@ namespace storm { std::set const &targetObservations, bool min, std::vector &observationResolutionVector, bool computeRewards, double explorationThreshold, boost::optional> overApproximationMap = boost::none, - boost::optional> underApproximationMap = boost::none); + boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); /** * @@ -164,24 +164,17 @@ namespace storm { * @param beliefList * @param beliefIsTarget * @param targetObservations - * @param observationProbabilities - * @param nextBelieves - * @param result - * @param chosenActions - * @param gridResolution * @param initialBeliefId * @param min + * @param computeReward + * @param maxModelSize * @return */ ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, std::set const &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); + uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); /** *