diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1a2a24a38..8d43e3977 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -414,12 +414,17 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set targetObservations, bool min, uint64_t gridResolution) { + ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution, + bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); bool finished = false; uint64_t iteration = 0; + if (computeRewards) { + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + } + std::vector> beliefList; std::vector beliefIsTarget; uint64_t nextId = 0; @@ -427,13 +432,10 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); nextId = beliefList.size(); beliefGridTimer.stop(); @@ -448,24 +450,28 @@ namespace storm { std::map>> observationProbabilities; // current ID -> action -> next ID std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { - result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); + result_backup.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - //TODO put this in extra function - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative + uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); + std::vector actionRewardsInState(numChoices); + for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, currentBelief, action); std::map actionObservationBelieves; @@ -473,23 +479,24 @@ namespace storm { uint32_t observation = iter->first; // THIS CALL IS SLOW // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, currentBelief, + action, observation, nextId); nextId = beliefList.size(); } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; + if (computeRewards) { + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief); + } } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); + if (computeRewards) { + beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + } } + } nextBeliefGeneration.stop(); @@ -508,16 +515,14 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (!isTarget) { // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); + uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); std::vector chosenActionIndices; ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); + currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -530,8 +535,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -540,16 +545,15 @@ namespace storm { auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } + currentValue += iter->second * sum; } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { @@ -561,7 +565,7 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndices; // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + if (!cc.isEqual(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) { improvement = true; } } @@ -580,17 +584,14 @@ namespace storm { beliefGrid.push_back(initialBelief); beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); std::vector> initSubSimplex = temp.first; std::vector initLambdas = temp.second; auto overApprox = storm::utility::zero(); for (size_t j = 0; j < initLambdas.size(); ++j) { if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; } } overApproxTimer.stop(); @@ -598,9 +599,9 @@ namespace storm { // Now onto the under-approximation storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, false) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min, false); + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -608,231 +609,24 @@ namespace storm { << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + return computeReachability(pomdp, targetObservations, min, gridResolution, false); } - //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication! template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, std::set targetObservations, bool min, uint64_t gridResolution) { - storm::utility::Stopwatch beliefGridTimer(true); - bool finished = false; - uint64_t iteration = 0; - - RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); - - std::vector> beliefList; - std::vector beliefIsTarget; - uint64_t nextId = 0; - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - - std::vector> beliefGrid; - constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, - nextId); - nextId = beliefList.size(); - beliefGridTimer.stop(); - - storm::utility::Stopwatch overApproxTimer(true); - // Belief ID -> Value - std::map result; - std::map result_backup; - // Belief ID -> ActionIndex - std::map> chosenActions; - - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - // current ID -> action -> reward - std::map> beliefActionRewards; - - storm::utility::Stopwatch nextBeliefGeneration(true); - for (size_t i = 0; i < beliefGrid.size(); ++i) { - auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - if (!isTarget) { - //TODO put this in extra function - // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative - uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( - pomdp, currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); - iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, - beliefList, - beliefIsTarget, - targetObservations, - currentBelief, - action, - observation, - nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - - actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief); - } - observationProbabilities.emplace( - std::make_pair(currentBelief.id, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - } - - } - nextBeliefGeneration.stop(); - - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices( - pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = beliefActionRewards[currentBelief.id][action]; - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at( - getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && - cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - - result[currentBelief.id] = chosenValue; - - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || - cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; - } - } - overApproxTimer.stop(); - - // Now onto the under-approximation - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, true); - underApproxTimer.stop(); - - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + return computeReachability(pomdp, targetObservations, min, gridResolution, true); } template diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 78c2b5536..cdf19dcd5 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -59,6 +59,20 @@ namespace storm { std::set targetObservations, bool min, uint64_t gridResolution, bool computeRewards); + /** + * + * @param pomdp + * @param targetObservations + * @param min + * @param gridResolution + * @param computeRewards + * @return + */ + std::unique_ptr> + computeReachability(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, + uint64_t gridResolution, bool computeRewards); + /** * TODO * @param pomdp