Browse Source

Removal of obsolete code

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
8e30e27eb9
  1. 246
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 80
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

246
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -40,8 +40,6 @@ namespace storm {
template<typename ValueType, typename RewardModelType>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::ApproximatePOMDPModelchecker(storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp, Options options) : pomdp(pomdp), options(options) {
cc = storm::utility::ConstantsComparator<ValueType>(storm::utility::convertNumber<ValueType>(this->options.numericPrecision), false);
useMdp = true;
maxIterations = 1000;
}
template<typename ValueType, typename RewardModelType>
@ -840,121 +838,6 @@ namespace storm {
refinementComponents->overApproxBeliefStateMap, underApproxComponents->underApproxBeliefStateMap});
}
template<typename ValueType, typename RewardModelType>
ValueType
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::overApproximationValueIteration(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<storm::pomdp::Belief<ValueType>> &beliefGrid,
std::vector<bool> &beliefIsTarget,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, std::vector<ValueType>> &beliefActionRewards,
std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> &subSimplexCache,
std::map<uint64_t, std::vector<ValueType>> &lambdaCache,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, std::vector<uint64_t>> &chosenActions,
uint64_t gridResolution, bool min, bool computeRewards) {
std::map<uint64_t, ValueType> result_backup = result;
uint64_t iteration = 0;
bool finished = false;
// Value Iteration
while (!finished && iteration < maxIterations) {
storm::utility::Stopwatch iterationTimer(true);
STORM_LOG_DEBUG("Iteration " << iteration + 1);
bool improvement = false;
for (size_t i = 0; i < beliefGrid.size(); ++i) {
storm::pomdp::Belief<ValueType> currentBelief = beliefGrid[i];
bool isTarget = beliefIsTarget[currentBelief.id];
if (!isTarget) {
// we can take any state with the observation as they have the same number of choices
uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front());
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>() : -storm::utility::infinity<ValueType>();
std::vector<uint64_t> chosenActionIndices;
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero<ValueType>();
for (auto iter = observationProbabilities[currentBelief.id][action].begin();
iter != observationProbabilities[currentBelief.id][action].end(); ++iter) {
uint32_t observation = iter->first;
storm::pomdp::Belief<ValueType> nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]];
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
// cache the values to not always re-calculate
std::vector<std::map<uint64_t, ValueType>> subSimplex;
std::vector<ValueType> lambdas;
if (options.cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) {
subSimplex = subSimplexCache[nextBelief.id];
lambdas = lambdaCache[nextBelief.id];
} else {
auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates());
subSimplex = temp.first;
lambdas = temp.second;
if (options.cacheSubsimplices) {
subSimplexCache[nextBelief.id] = subSimplex;
lambdaCache[nextBelief.id] = lambdas;
}
}
auto sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
}
}
currentValue += iter->second * sum;
}
// Update the selected actions
if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
(!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
}
chosenActionIndices.push_back(action);
}
}
result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndices;
// Check if the iteration brought an improvement
if (!cc.isEqual(result_backup[currentBelief.id], result[currentBelief.id])) {
improvement = true;
}
}
}
finished = !improvement;
// back up
result_backup = result;
++iteration;
iterationTimer.stop();
STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl);
}
STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl);
std::vector<ValueType> initialLambda;
std::vector<std::map<uint64_t, ValueType>> initialSubsimplex;
if (options.cacheSubsimplices) {
initialLambda = lambdaCache[0];
initialSubsimplex = subSimplexCache[0];
} else {
auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution, pomdp.getNumberOfStates());
initialSubsimplex = temp.first;
initialLambda = temp.second;
}
auto overApprox = storm::utility::zero<ValueType>();
for (size_t j = 0; j < initialLambda.size(); ++j) {
if (initialLambda[j] != storm::utility::zero<ValueType>()) {
overApprox += initialLambda[j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, initialSubsimplex[j])];
}
}
return overApprox;
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min) {
@ -969,135 +852,6 @@ namespace storm {
return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min, uint64_t gridResolution,
bool computeRewards) {
storm::utility::Stopwatch beliefGridTimer(true);
std::vector<storm::pomdp::Belief<ValueType>> beliefList;
std::vector<bool> beliefIsTarget;
uint64_t nextId = 0;
// Initial belief always has ID 0
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(nextId);
++nextId;
beliefList.push_back(initialBelief);
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
constructBeliefGrid(targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, nextId);
nextId = beliefList.size();
beliefGridTimer.stop();
storm::utility::Stopwatch overApproxTimer(true);
// Belief ID -> Value
std::map<uint64_t, ValueType> result;
// Belief ID -> ActionIndex
std::map<uint64_t, std::vector<uint64_t>> chosenActions;
// Belief ID -> action -> Observation -> Probability
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
// current ID -> action -> next ID
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> nextBelieves;
// current ID -> action -> reward
std::map<uint64_t, std::vector<ValueType>> beliefActionRewards;
//Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache;
auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates());
if (options.cacheSubsimplices) {
subSimplexCache[0] = temp.first;
lambdaCache[0] = temp.second;
}
storm::utility::Stopwatch nextBeliefGeneration(true);
for (size_t i = 0; i < beliefGrid.size(); ++i) {
auto currentBelief = beliefGrid[i];
bool isTarget = beliefIsTarget[currentBelief.id];
if (isTarget) {
result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero<ValueType>() : storm::utility::one<ValueType>()));
} else {
result.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
//TODO put this in extra function
// As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative
uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices);
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
std::vector<ValueType> actionRewardsInState(numChoices);
for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(currentBelief, action);
std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
// THIS CALL IS SLOW
// TODO speed this up
actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, currentBelief,
action, observation, nextId);
nextId = beliefList.size();
}
observationProbabilitiesInAction[action] = actionObservationProbabilities;
nextBelievesInAction[action] = actionObservationBelieves;
if (computeRewards) {
actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
currentBelief);
}
}
observationProbabilities.emplace(std::make_pair(currentBelief.id, observationProbabilitiesInAction));
nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction));
if (computeRewards) {
beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState));
}
}
}
nextBeliefGeneration.stop();
STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl)
// Value Iteration
auto overApprox = overApproximationValueIteration(beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards,
subSimplexCache, lambdaCache,
result, chosenActions, gridResolution, min, computeRewards);
overApproxTimer.stop();
// Now onto the under-approximation
storm::utility::Stopwatch underApproxTimer(true);
/*ValueType underApprox = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/
underApproxTimer.stop();
auto underApprox = storm::utility::zero<ValueType>();
STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl
<< "Time Overapproximation: " << overApproxTimer
<< std::endl
<< "Time Underapproximation: " << underApproxTimer
<< std::endl);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{overApprox, underApprox});
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min,
uint64_t gridResolution) {
return computeReachability(pomdp, targetObservations, min, gridResolution, false);
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min,
uint64_t gridResolution) {
return computeReachability(pomdp, targetObservations, min, gridResolution, true);
}
template<typename ValueType, typename RewardModelType>
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,

80
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -99,37 +99,6 @@ namespace storm {
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityRewardOTF(std::set<uint32_t> const &targetObservations, bool min);
// TODO: Check if this is obsolete
/**
* Compute the reachability probability for given target observations on a POMDP for the given resolution only.
* Static state space generation is used for the overapproximation, i.e. the whole grid is generated
*
* @param pomdp the POMDP to be checked
* @param targetObservations the set of observations to be reached
* @param min true if the minimum probability is to be computed
* @param gridResolution the initial grid resolution
* @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityProbability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min,
uint64_t gridResolution);
// TODO: Check if this is obsolete
/**
* Compute the reachability rewards for given target observations on a POMDP for the given resolution only.
* Static state space generation is used for the overapproximation, i.e. the whole grid is generated
*
* @param targetObservations the set of observations to be reached
* @param min true if the minimum rewards are to be computed
* @param gridResolution the initial grid resolution
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min,
uint64_t gridResolution);
private:
/**
* Helper method to compute the inital step of the refinement loop
@ -173,21 +142,6 @@ namespace storm {
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
// TODO: Check if this is obsolete
/**
* Helper method to compute reachability properties using static state space generation
*
* @param targetObservations set of target observations
* @param min true if minimum value is to be computed
* @param gridResolution the resolution of the grid to be used
* @param computeRewards true if rewards are to be computed, false if probability is computed
* @return A struct containing the overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/
std::unique_ptr<POMDPCheckResult<ValueType>>
computeReachability(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min,
uint64_t gridResolution, bool computeRewards);
/**
* Helper to compute an underapproximation of the reachability property.
* The implemented method unrolls the belief support of the given POMDP up to a given number of belief states.
@ -314,43 +268,9 @@ namespace storm {
ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief);
/**
* Helper method for value iteration on data structures representing the belief grid
* This is very close to the method implemented in PRISM POMDP
*
* @param pomdp The POMDP
* @param beliefList data structure to store all generated beliefs
* @param beliefGrid data structure to store references to the grid beliefs specifically
* @param beliefIsTarget vector containing true if the corresponding belief in the beleif list is a target belief
* @param observationProbabilities data structure containing for each belief and possible action the probability to go to a state with a given observation
* @param nextBelieves data structure containing for each belief the successor belief after performing an action and observing a given observation
* @param beliefActionRewards data structure containing for each belief and possible action the reward for performing the action
* @param subSimplexCache caching data structure to store already computed subsimplices
* @param lambdaCache caching data structure to store already computed lambda values
* @param result data structure to store result values for each grid state
* @param chosenActions data structure to store the action(s) that lead to the computed result value
* @param gridResolution the resolution of the grid
* @param min true if minimal values are to be computed
* @param computeRewards true if rewards are to be computed
* @return the resulting probability/reward in the initial state
*/
ValueType
overApproximationValueIteration(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<storm::pomdp::Belief<ValueType>> &beliefGrid, std::vector<bool> &beliefIsTarget,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, std::vector<ValueType>> &beliefActionRewards,
std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> &subSimplexCache,
std::map<uint64_t, std::vector<ValueType>> &lambdaCache, std::map<uint64_t, ValueType> &result,
std::map<uint64_t, std::vector<uint64_t>> &chosenActions,
uint64_t gridResolution, bool min, bool computeRewards);
storm::models::sparse::Pomdp<ValueType, RewardModelType> const& pomdp;
Options options;
storm::utility::ConstantsComparator<ValueType> cc;
// TODO: these should be obsolete, right?
bool useMdp;
uint64_t maxIterations;
};
}

Loading…
Cancel
Save