diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1c40e96f6..7a51609ba 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -40,8 +40,6 @@ namespace storm { template ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp const& pomdp, Options options) : pomdp(pomdp), options(options) { cc = storm::utility::ConstantsComparator(storm::utility::convertNumber(this->options.numericPrecision), false); - useMdp = true; - maxIterations = 1000; } template @@ -840,121 +838,6 @@ namespace storm { refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap}); } - template - ValueType - ApproximatePOMDPModelchecker::overApproximationValueIteration(std::vector> &beliefList, - std::vector> &beliefGrid, - std::vector &beliefIsTarget, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map> &beliefActionRewards, - std::map>> &subSimplexCache, - std::map> &lambdaCache, - std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, bool min, bool computeRewards) { - std::map result_backup = result; - uint64_t iteration = 0; - bool finished = false; - // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (options.cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - - result[currentBelief.id] = chosenValue; - - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (!cc.isEqual(result_backup[currentBelief.id], result[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - std::vector initialLambda; - std::vector> initialSubsimplex; - if (options.cacheSubsimplices) { - initialLambda = lambdaCache[0]; - initialSubsimplex = subSimplexCache[0]; - } else { - auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution, pomdp.getNumberOfStates()); - initialSubsimplex = temp.first; - initialLambda = temp.second; - } - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initialLambda.size(); ++j) { - if (initialLambda[j] != storm::utility::zero()) { - overApprox += initialLambda[j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, initialSubsimplex[j])]; - } - } - return overApprox; - } - template std::unique_ptr> ApproximatePOMDPModelchecker::computeReachabilityRewardOTF(std::set const &targetObservations, bool min) { @@ -969,135 +852,6 @@ namespace storm { return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, uint64_t gridResolution, - bool computeRewards) { - storm::utility::Stopwatch beliefGridTimer(true); - - std::vector> beliefList; - std::vector beliefIsTarget; - uint64_t nextId = 0; - // Initial belief always has ID 0 - storm::pomdp::Belief initialBelief = getInitialBelief(nextId); - ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::vector> beliefGrid; - constructBeliefGrid(targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId); - nextId = beliefList.size(); - beliefGridTimer.stop(); - - storm::utility::Stopwatch overApproxTimer(true); - // Belief ID -> Value - std::map result; - // Belief ID -> ActionIndex - std::map> chosenActions; - - // Belief ID -> action -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; - // current ID -> action -> reward - std::map> beliefActionRewards; - //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - - auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); - if (options.cacheSubsimplices) { - subSimplexCache[0] = temp.first; - lambdaCache[0] = temp.second; - } - - storm::utility::Stopwatch nextBeliefGeneration(true); - for (size_t i = 0; i < beliefGrid.size(); ++i) { - auto currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (isTarget) { - result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); - } else { - result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - //TODO put this in extra function - - // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative - uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); - - std::vector actionRewardsInState(numChoices); - - for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(currentBelief, action); - std::map actionObservationBelieves; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, currentBelief, - action, observation, nextId); - nextId = beliefList.size(); - } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief); - } - } - observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); - if (computeRewards) { - beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - } - } - - } - nextBeliefGeneration.stop(); - - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) - // Value Iteration - auto overApprox = overApproximationValueIteration(beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, - subSimplexCache, lambdaCache, - result, chosenActions, gridResolution, min, computeRewards); - overApproxTimer.stop(); - - // Now onto the under-approximation - storm::utility::Stopwatch underApproxTimer(true); - /*ValueType underApprox = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/ - underApproxTimer.stop(); - auto underApprox = storm::utility::zero(); - STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl - << "Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl); - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution) { - return computeReachability(pomdp, targetObservations, min, gridResolution, false); - } - - template - std::unique_ptr> - ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution) { - return computeReachability(pomdp, targetObservations, min, gridResolution, true); - } - template std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index e5cb6ea1b..5805449f4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -99,37 +99,6 @@ namespace storm { std::unique_ptr> computeReachabilityRewardOTF(std::set const &targetObservations, bool min); - // TODO: Check if this is obsolete - /** - * Compute the reachability probability for given target observations on a POMDP for the given resolution only. - * Static state space generation is used for the overapproximation, i.e. the whole grid is generated - * - * @param pomdp the POMDP to be checked - * @param targetObservations the set of observations to be reached - * @param min true if the minimum probability is to be computed - * @param gridResolution the initial grid resolution - * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution); - - // TODO: Check if this is obsolete - /** - * Compute the reachability rewards for given target observations on a POMDP for the given resolution only. - * Static state space generation is used for the overapproximation, i.e. the whole grid is generated - * - * @param targetObservations the set of observations to be reached - * @param min true if the minimum rewards are to be computed - * @param gridResolution the initial grid resolution - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution); - private: /** * Helper method to compute the inital step of the refinement loop @@ -173,21 +142,6 @@ namespace storm { boost::optional> overApproximationMap = boost::none, boost::optional> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200); - // TODO: Check if this is obsolete - /** - * Helper method to compute reachability properties using static state space generation - * - * @param targetObservations set of target observations - * @param min true if minimum value is to be computed - * @param gridResolution the resolution of the grid to be used - * @param computeRewards true if rewards are to be computed, false if probability is computed - * @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values - */ - std::unique_ptr> - computeReachability(storm::models::sparse::Pomdp const &pomdp, - std::set const &targetObservations, bool min, - uint64_t gridResolution, bool computeRewards); - /** * Helper to compute an underapproximation of the reachability property. * The implemented method unrolls the belief support of the given POMDP up to a given number of belief states. @@ -314,43 +268,9 @@ namespace storm { ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); - /** - * Helper method for value iteration on data structures representing the belief grid - * This is very close to the method implemented in PRISM POMDP - * - * @param pomdp The POMDP - * @param beliefList data structure to store all generated beliefs - * @param beliefGrid data structure to store references to the grid beliefs specifically - * @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief - * @param observationProbabilities data structure containing for each belief and possible action the probability to go to a state with a given observation - * @param nextBelieves data structure containing for each belief the successor belief after performing an action and observing a given observation - * @param beliefActionRewards data structure containing for each belief and possible action the reward for performing the action - * @param subSimplexCache caching data structure to store already computed subsimplices - * @param lambdaCache caching data structure to store already computed lambda values - * @param result data structure to store result values for each grid state - * @param chosenActions data structure to store the action(s) that lead to the computed result value - * @param gridResolution the resolution of the grid - * @param min true if minimal values are to be computed - * @param computeRewards true if rewards are to be computed - * @return the resulting probability/reward in the initial state - */ - ValueType - overApproximationValueIteration(std::vector> &beliefList, - std::vector> &beliefGrid, std::vector &beliefIsTarget, - std::map>> &observationProbabilities, - std::map>> &nextBelieves, - std::map> &beliefActionRewards, - std::map>> &subSimplexCache, - std::map> &lambdaCache, std::map &result, - std::map> &chosenActions, - uint64_t gridResolution, bool min, bool computeRewards); - storm::models::sparse::Pomdp const& pomdp; Options options; storm::utility::ConstantsComparator cc; - // TODO: these should be obsolete, right? - bool useMdp; - uint64_t maxIterations; }; }