diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index a32d7dcc0..37a29f9b1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -11,6 +11,7 @@ #include "storm/modelchecker/results/ExplicitQualitativeCheckResult.h" #include "storm/modelchecker/results/ExplicitQuantitativeCheckResult.h" #include "storm/api/properties.h" +#include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" namespace storm { @@ -18,7 +19,10 @@ namespace storm { namespace modelchecker { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { - cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(0.00000000001), false); + precision = 0.000000001; + cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(precision), false); + useMdp = false; + maxIterations = 1000; } template @@ -44,22 +48,18 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - uint64_t maxIterations = 100; - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; std::vector> beliefGrid; std::map result; - std::map result_backup; //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; - std::map chosenActions; + std::map> chosenActions; std::deque beliefsToBeExpanded; @@ -67,6 +67,8 @@ namespace storm { std::map>> observationProbabilities; // current ID -> action -> next ID std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; uint64_t nextId = 0; storm::utility::Stopwatch expansionTimer(true); @@ -74,18 +76,27 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // These are the components to build the MDP from the grid + std::map beliefStateMap; + std::vector>> mdpTransitions; + std::vector targetStates; + uint64_t mdpStateId = 0; + + beliefStateMap[initialBelief.id] = mdpStateId; + ++mdpStateId; // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); + std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = initTemp.first; std::vector initLambdas = initTemp.second; subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; bool initInserted = false; + std::vector> initTransitionsInBelief; + std::map initTransitionInActionBelief; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { @@ -101,268 +112,23 @@ namespace storm { storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; beliefList.push_back(gridBelief); beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; - } - } - } - } - - - //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - - // Expand the beliefs to generate the grid on-the-fly to avoid unreachable grid points - while (!beliefsToBeExpanded.empty()) { - uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - if (isTarget) { - result.emplace(std::make_pair(currId, storm::utility::one())); - result_backup.emplace(std::make_pair(currId, storm::utility::one())); - } else { - result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currId].observation).front()); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currId], action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, - observation, nextId); - nextId = beliefList.size(); - actionObservationBelieves[observation] = idNextBelief; - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(idNextBelief) > 0) { - // TODO is this necessary here? Think later - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - if (getBeliefIdInVector(beliefGrid, observation, subSimplex[j]) == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - ++nextId; - } - } - } - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); - } - } - expansionTimer.stop(); - STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl) - STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) - STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) - - storm::utility::Stopwatch overApproxTimer(true); - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); - ValueType currentValue; - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - // TODO is this necessary here? Everything should have already been computed - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - chosenActionIndex = action; - } - } - result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; + beliefStateMap[nextId] = mdpStateId; + initTransitionInActionBelief[mdpStateId] = initLambdas[j]; + ++nextId; + ++mdpStateId; } } } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); } - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } - } - overApproxTimer.stop(); - - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false); - underApproxTimer.stop(); - - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); - - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { - STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - - uint64_t maxIterations = 100; - bool finished = false; - uint64_t iteration = 0; - std::vector> beliefList; - std::vector beliefIsTarget; - std::vector> beliefGrid; - std::map result; - std::map result_backup; - //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - std::map chosenActions; - - std::deque beliefsToBeExpanded; - - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - // current ID -> action -> reward - std::map> beliefActionRewards; - - uint64_t nextId = 0; - storm::utility::Stopwatch expansionTimer(true); - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = initTemp.first; - std::vector initLambdas = initTemp.second; - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; - bool initInserted = false; - - - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { - uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); - if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { - if (searchResult == 0) { - // the initial belief is on the grid itself - initInserted = true; - beliefGrid.push_back(initialBelief); - beliefsToBeExpanded.push_back(0); - } else { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - ++nextId; - } - } - } + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + if (!initTransitionInActionBelief.empty()) { + initTransitionsInBelief.push_back(initTransitionInActionBelief); + mdpTransitions.push_back(initTransitionsInBelief); } @@ -374,19 +140,31 @@ namespace storm { beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - if (!isTarget) { + if (isTarget) { + // Depending on whether we compute rewards, we select the right initial result + result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); + + // MDP stuff + std::vector> transitionsInBelief; + targetStates.push_back(beliefStateMap[currId]); + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + mdpTransitions.push_back(transitionsInBelief); + } else { + result.emplace(std::make_pair(currId, storm::utility::zero())); uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); std::vector actionRewardsInState(numChoices); + std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; + std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW @@ -421,21 +199,40 @@ namespace storm { beliefIsTarget.push_back( targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; + ++mdpStateId; + } else { + transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; } } } } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + beliefList[currId]); + } + if (!transitionInActionBelief.empty()) { + transitionsInBelief.push_back(transitionInActionBelief); + } } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + } + + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief); } } expansionTimer.stop(); @@ -443,207 +240,89 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) - storm::utility::Stopwatch overApproxTimer(true); - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - //TODO This should not ne reachable - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - - chosenValue = currentValue; - chosenActionIndex = action; - } - } - - result[currentBelief.id] = chosenValue; + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } - chosenActions[currentBelief.id] = chosenActionIndex; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); } } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); } + overApproxMdp.printModelInformationToStream(std::cout); - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } - } + storm::utility::Stopwatch overApproxTimer(true); + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); - + auto underApprox = storm::utility::zero(); + /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); underApproxTimer.stop(); STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl); - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); - - } - - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { - storm::utility::Stopwatch beliefGridTimer(true); - uint64_t maxIterations = 100; - bool finished = false; - uint64_t iteration = 0; - - std::vector> beliefList; - std::vector beliefIsTarget; - uint64_t nextId = 0; - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - - std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); - nextId = beliefList.size(); - beliefGridTimer.stop(); - - storm::utility::Stopwatch overApproxTimer(true); - // Belief ID -> Value - std::map result; - std::map result_backup; - // Belief ID -> ActionIndex - std::map chosenActions; + << std::endl);*/ - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - storm::utility::Stopwatch nextBeliefGeneration(true); - for (size_t i = 0; i < beliefGrid.size(); ++i) { - auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (isTarget) { - result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); - } else { - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - //TODO put this in extra function - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - } - } - nextBeliefGeneration.stop(); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) + template + ValueType + ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector> &beliefGrid, + std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, + std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards) { + std::map result_backup = result; + uint64_t iteration = 0; + bool finished = false; // Value Iteration while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); @@ -654,16 +333,14 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); + currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -676,8 +353,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -686,25 +363,29 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } + currentValue += iter->second * sum; } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } + result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndex; + + chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + if (!cc.isEqual(result_backup[currentBelief.id], result[currentBelief.id])) { improvement = true; } } @@ -720,58 +401,38 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + for (size_t j = 0; j < lambdaCache[0].size(); ++j) { + if (lambdaCache[0][j] != storm::utility::zero()) { + overApprox += lambdaCache[0][j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, subSimplexCache[0][j])]; } } - overApproxTimer.stop(); - - // Now onto the under-approximation - bool useMdp = true; - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min, false); - underApproxTimer.stop(); - - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); + return overApprox; + } - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t gridResolution) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, true); + } - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t gridResolution) { + return computeReachabilityOTF(pomdp, targetObservations, min, gridResolution, false); } - //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication! template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); - uint64_t maxIterations = 100; - bool finished = false; - uint64_t iteration = 0; - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); std::vector> beliefList; std::vector beliefIsTarget; @@ -780,22 +441,18 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); beliefGridTimer.stop(); storm::utility::Stopwatch overApproxTimer(true); // Belief ID -> Value std::map result; - std::map result_backup; // Belief ID -> ActionIndex - std::map chosenActions; + std::map> chosenActions; // Belief ID -> Observation -> Probability std::map>> observationProbabilities; @@ -803,14 +460,22 @@ namespace storm { std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + subSimplexCache[0] = temp.first; + lambdaCache[0] = temp.second; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsTarget[currentBelief.id]; - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - if (!isTarget) { + if (isTarget) { + result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); + } else { + result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); @@ -821,151 +486,44 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; - - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief); + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief); + } } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + } } } nextBeliefGeneration.stop(); - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - - chosenValue = currentValue; - chosenActionIndex = action; - } - } - - result[currentBelief.id] = chosenValue; - - chosenActions[currentBelief.id] = chosenActionIndex; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } - } + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); + ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -976,125 +534,36 @@ namespace storm { STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); } template - ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeReward) { - std::set visitedBelieves; - std::deque believesToBeExpanded; - std::map beliefStateMap; - std::vector> transitions; - std::vector targetStates; - - uint64_t stateId = 0; - beliefStateMap[initialBeliefId] = stateId; - ++stateId; - - // Expand the believes - visitedBelieves.insert(initialBeliefId); - believesToBeExpanded.push_back(initialBeliefId); - while (!believesToBeExpanded.empty()) { - auto currentBeliefId = believesToBeExpanded.front(); - std::map transitionsInState; - STORM_LOG_DEBUG("Exploring Belief " << beliefList[currentBeliefId].observation << "||" - << beliefList[currentBeliefId].probabilities); - if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states and save them - transitionsInState[beliefStateMap[currentBeliefId]] = storm::utility::one(); - targetStates.push_back(beliefStateMap[currentBeliefId]); - } else { - if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, we have not computed the action to choose yet - chosenActions[currentBeliefId] = extractBestAction(pomdp, beliefList, beliefIsTarget, - targetObservations, - observationProbabilities, - nextBelieves, result, gridResolution, - currentBeliefId, beliefList.size(), min); - } - for (auto iter = observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].begin(); - iter != - observationProbabilities[currentBeliefId][chosenActions[currentBeliefId]].end(); ++iter) { - uint32_t observation = iter->first; - uint64_t nextBeliefId = nextBelieves[currentBeliefId][chosenActions[currentBeliefId]][observation]; - if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap[nextBeliefId] = stateId; - ++stateId; - believesToBeExpanded.push_back(nextBeliefId); - } - transitionsInState[beliefStateMap[nextBeliefId]] = iter->second; - } - } - transitions.push_back(transitionsInState); - believesToBeExpanded.pop_front(); - } - - storm::models::sparse::StateLabeling labeling(transitions.size()); - labeling.addLabel("init"); - labeling.addLabel("target"); - labeling.addLabelToState("init", 0); - for (auto targetState : targetStates) { - labeling.addLabelToState("target", targetState); - } - - storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - // Add the reward collected by taking the chosen Action in the belief - rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( - storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])), - currentBelief)); - } - - std::unordered_map rewardModels = {{"std", rewardModel}}; - - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(transitions), labeling, rewardModels); - - storm::models::sparse::Dtmc underApproxDtmc(modelComponents); - auto model = std::make_shared>(underApproxDtmc); - model->printModelInformationToStream(std::cout); - - std::string propertyString; - if (computeReward) { - propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; - } else { - propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; - } - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t gridResolution) { + return computeReachability(pomdp, targetObservations, min, gridResolution, false); + } - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); - STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, uint64_t gridResolution) { + return computeReachability(pomdp, targetObservations, min, gridResolution, true); } template ValueType - ApproximatePOMDPModelchecker::computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, - bool computeRewards) { + ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeRewards, bool generateMdp) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -1119,41 +588,21 @@ namespace storm { targetStates.push_back(beliefStateMap[currentBeliefId]); actionTransitionStorage.push_back(transitionsInStateWithAction); } else { - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); if (chosenActions.find(currentBeliefId) == chosenActions.end()) { - // If the current Belief is not part of the grid, the next states have not been computed yet. - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, beliefList[currentBeliefId], action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - beliefList[currentBeliefId], - action, - observation, - beliefList.size()); - } - observationProbabilitiesInAction.push_back(actionObservationProbabilities); - nextBelievesInAction.push_back(actionObservationBelieves); - } - observationProbabilities.emplace(std::make_pair(currentBeliefId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); + chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min) : + extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations, + observationProbabilities, + nextBelieves, result, gridResolution, + currentBeliefId, beliefList.size(), min); } // Iterate over all actions and add the corresponding transitions - for (uint64_t action = 0; action < numChoices; ++action) { + for (auto const &action : chosenActions[currentBeliefId]) { std::map transitionsInStateWithAction; - for (auto iter = observationProbabilities[currentBeliefId][action].begin(); - iter != - observationProbabilities[currentBeliefId][action].end(); ++iter) { + for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) { uint32_t observation = iter->first; uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation]; if (visitedBelieves.insert(nextBeliefId).second) { @@ -1178,28 +627,48 @@ namespace storm { labeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents( - buildTransitionMatrix(transitions), labeling); - - storm::models::sparse::Mdp underApproxMdp(modelComponents); + std::shared_ptr> model; + auto transitionMatrix = buildTransitionMatrix(transitions); + if (transitionMatrix.getRowCount() == transitionMatrix.getRowGroupCount()) { + transitionMatrix.makeRowGroupingTrivial(); + } + storm::storage::sparse::ModelComponents modelComponents(transitionMatrix, labeling); + if (transitionMatrix.hasTrivialRowGrouping()) { + + storm::models::sparse::Dtmc underApproxMc(modelComponents); + storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); + if (computeRewards) { + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])), + currentBelief)); + } + } + underApproxMc.addRewardModel("std", rewardModel); + underApproxMc.restrictRewardModels(std::set({"std"})); - if (computeRewards) { - storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { - // Add the reward - rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + model = std::make_shared>(underApproxMc); + } else { + storm::models::sparse::Mdp underApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } } + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); } - underApproxMdp.addRewardModel("std", rewardModel); - underApproxMdp.restrictRewardModels(std::set({"std"})); + model = std::make_shared>(underApproxMdp); } - - auto model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); std::string propertyString; @@ -1209,12 +678,9 @@ namespace storm { propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; } std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties( - propertyVector).front(); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - std::unique_ptr res( - storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, - true))); + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); return res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second; @@ -1251,30 +717,11 @@ namespace storm { } template - storm::storage::SparseMatrix - ApproximatePOMDPModelchecker::buildTransitionMatrix( - std::vector> transitions) { - uint_fast64_t currentRow = 0; - uint64_t nrEntries = 0; - for (auto const &map : transitions) { - nrEntries += map.size(); - } - storm::storage::SparseMatrixBuilder smb(transitions.size(), transitions.size(), nrEntries); - for (auto const &map : transitions) { - for (auto const &transition : map) { - smb.addNextValue(currentRow, transition.first, transition.second); - } - ++currentRow; - } - return smb.build(); - } - - template - uint64_t ApproximatePOMDPModelchecker::extractBestAction( + std::vector ApproximatePOMDPModelchecker::extractBestActions( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map &result, @@ -1290,17 +737,10 @@ namespace storm { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { + for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); @@ -1310,9 +750,8 @@ namespace storm { nextBelieves.emplace(std::make_pair(currentBeliefId, nextBelievesInAction)); // choose the action which results in the value computed by the over-approximation - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); + std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { @@ -1330,8 +769,7 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result.at( - getBeliefIdInVector(beliefList, observation, subSimplex[j])); + sum += lambdas[j] * result.at(getBeliefIdInVector(beliefList, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -1343,10 +781,28 @@ namespace storm { cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; - chosenActionIndex = action; + if (!cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + chosenActionIndices.clear(); + } + chosenActionIndices.push_back(action); } } - return chosenActionIndex; + return chosenActionIndices; + } + + template + std::vector ApproximatePOMDPModelchecker::extractBestAction( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, bool min) { + return std::vector{ + extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, gridResolution, currentBeliefId, + nextId, min).front()}; } @@ -1394,7 +850,7 @@ namespace storm { template void ApproximatePOMDPModelchecker::constructBeliefGrid( storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution, + std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsKnown, uint64_t nextId) { @@ -1440,9 +896,7 @@ namespace storm { storm::utility::convertNumber(gridResolution); storm::pomdp::Belief belief = {newId, observation, distribution}; - STORM_LOG_TRACE( - "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) - << ")," << distribution << "]"); + STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); @@ -1576,7 +1030,8 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, - std::vector &beliefIsTarget, std::set &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, + std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, + uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 1222d0b1f..6d8b50d43 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -27,24 +27,52 @@ namespace storm { std::unique_ptr> computeReachabilityProbabilityOTF(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> - computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, + computeReachabilityRewardOTF(storm::models::sparse::Pomdp const &pomdp, std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); std::unique_ptr> computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, + std::set const &targetObservations, bool min, uint64_t gridResolution); private: + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachabilityOTF(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set const &targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + /** * TODO * @param pomdp @@ -58,15 +86,38 @@ namespace storm { * @param min * @return */ - uint64_t extractBestAction(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &target_observations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, - bool min); + std::vector extractBestActions(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); + + /** + * TODO + * @param pomdp + * @param beliefList + * @param observationProbabilities + * @param nextBelieves + * @param result + * @param gridResolution + * @param currentBeliefId + * @param nextId + * @param min + * @return + */ + std::vector extractBestAction(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &target_observations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + uint64_t gridResolution, uint64_t currentBeliefId, uint64_t nextId, + bool min); /** * TODO @@ -83,25 +134,15 @@ namespace storm { * @param min * @return */ - ValueType computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); - - ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set &targetObservations, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map &result, - std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); + ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &targetObservations, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map &result, + std::map> chosenActions, + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp); /** * @@ -131,7 +172,7 @@ namespace storm { * */ void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, - std::set target_observations, uint64_t gridResolution, + std::set const &target_observations, uint64_t gridResolution, std::vector> &beliefList, std::vector> &grid, std::vector &beliefIsKnown, uint64_t nextId); @@ -165,7 +206,7 @@ namespace storm { storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, - std::set &targetObservations, + std::set const &targetObservations, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id); @@ -191,15 +232,29 @@ namespace storm { uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, std::vector probabilities); - storm::storage::SparseMatrix - buildTransitionMatrix(std::vector> transitions); - - storm::storage::SparseMatrix - buildTransitionMatrix(std::vector>> transitions); + /** + * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) + * @return sparseMatrix representing the transitions + */ + storm::storage::SparseMatrix buildTransitionMatrix(std::vector>> transitions); ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType + overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + std::vector> &beliefGrid, std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards); + storm::utility::ConstantsComparator cc; + double precision; + bool useMdp; + uint64_t maxIterations; }; }