diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 8d43e3977..c377461d8 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -598,10 +598,8 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -631,119 +629,16 @@ namespace storm { template ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeReward) { - std::set visitedBelieves; - std::deque believesToBeExpanded; - std::map beliefStateMap; - std::vector> transitions; - std::vector targetStates; - - uint64_t stateId = 0; - beliefStateMap[initialBeliefId] = stateId; - ++stateId; - - // Expand the believes - visitedBelieves.insert(initialBeliefId); - believesToBeExpanded.push_back(initialBeliefId); - while (!believesToBeExpanded.empty()) { - auto currentBeliefId = believesToBeExpanded.front(); - std::map transitionsInState; - STORM_LOG_DEBUG("Exploring Belief " << beliefList[currentBeliefId].observation << "||" - << beliefList[currentBeliefId].probabilities); - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states and save them - transitionsInState[beliefStateMap[currentBeliefId]] = storm::utility::one(); - targetStates.push_back(beliefStateMap[currentBeliefId]); - } else { - if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min); - } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].begin(); - iter != observationProbabilities[currentBeliefId][chosenActions[currentBeliefId][0]].end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId][0]][observation]; - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap[nextBeliefId] = stateId; - ++stateId; - believesToBeExpanded.push_back(nextBeliefId); - } - transitionsInState[beliefStateMap[nextBeliefId]] = iter->second; - } - } - transitions.push_back(transitionsInState); - believesToBeExpanded.pop_front(); - } - - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - if (computeReward) { - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), - currentBelief)); - } - } - - std::unordered_map rewardModels = {{"std", rewardModel}}; - - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(transitions), labeling, rewardModels); - - storm::models::sparse::Dtmc underApproxDtmc(modelComponents); - auto model = std::make_shared>(underApproxDtmc); - model->printModelInformationToStream(std::cout); - - std::string propertyString; - if (computeReward) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); - - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; - } - - template - ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeRewards) { + ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeRewards, bool generateMdp) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -768,40 +663,21 @@ namespace storm { targetStates.push_back(beliefStateMap[currentBeliefId]); actionTransitionStorage.push_back(transitionsInStateWithAction); } else { - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, the next states have not been computed yet. - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currentBeliefId], action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - beliefList[currentBeliefId], - action, - observation, - beliefList.size()); - } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); - } - observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); + chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min) : + extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min); } // Iterate over all actions and add the corresponding transitions - for (uint64_t action = 0; action < numChoices; ++action) { + for (auto const &action : chosenActions[currentBeliefId]) { std::map transitionsInStateWithAction; - for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -826,28 +702,48 @@ namespace storm { labeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents( - buildTransitionMatrix(transitions), labeling); - - storm::models::sparse::Mdp underApproxMdp(modelComponents); + std::shared_ptr> model; + auto transitionMatrix = buildTransitionMatrix(transitions); + if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { + transitionMatrix.makeRowGroupingTrivial(); + } + storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); + if (transitionMatrix.hasTrivialRowGrouping()) { + + storm::models::sparse::Dtmc underApproxMc(modelComponents); + storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); + if (computeRewards) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } + } + underApproxMc.addRewardModel("std", rewardModel); + underApproxMc.restrictRewardModels(std::set({"std"})); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + model = std::make_shared>(underApproxMc); + } else { + storm::models::sparse::Mdp underApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } } + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); + model = std::make_shared>(underApproxMdp); } - - auto model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); std::string propertyString; @@ -857,12 +753,9 @@ namespace storm { propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; } std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; @@ -938,17 +831,10 @@ namespace storm { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); @@ -958,8 +844,7 @@ namespace storm { nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); // choose the action which results in the value computed by the over-approximation - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index cdf19dcd5..2c289d1f5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -134,25 +134,15 @@ namespace storm { * @param min * @return */ - ValueType computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); - - ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map> chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); + ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** *