Browse Source

Added reward over-approximation

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
f119e3d4c7
  1. 55
      src/storm-pomdp-cli/storm-pomdp.cpp
  2. 254
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

55
src/storm-pomdp-cli/storm-pomdp.cpp

@ -183,6 +183,7 @@ int main(const int argc, const char** argv) {
}
STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException,
"The formula is not supported by the grid approximation");
STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!");
storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double>();
double overRes = storm::utility::one<double>();
@ -203,6 +204,60 @@ int main(const int argc, const char** argv) {
pomdp = selfLoopEliminator.transform();
STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl);
}
if (pomdpSettings.isGridApproximationSet()) {
std::string rewardModelName;
storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula();
if (rewFormula.hasRewardModelName()) {
rewardModelName = rewFormula.getRewardModelName();
}
storm::logic::Formula const &subformula1 = rewFormula.getSubformula();
std::set<uint32_t> targetObservationSet;
//TODO refactor
bool validFormula = false;
if (subformula1.isEventuallyFormula()) {
storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula();
storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula();
if (subformula2.isAtomicLabelFormula()) {
storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula();
validFormula = true;
std::string targetLabel = alFormula.getLabel();
auto labeling = pomdp->getStateLabeling();
for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) {
if (labeling.getStateHasLabel(targetLabel, state)) {
targetObservationSet.insert(pomdp->getObservation(state));
}
}
} else if (subformula2.isAtomicExpressionFormula()) {
validFormula = true;
std::stringstream stream;
stream << subformula2.asAtomicExpressionFormula().getExpression();
storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str());
std::string targetLabel = formula3.getLabel();
auto labeling = pomdp->getStateLabeling();
for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) {
if (labeling.getStateHasLabel(targetLabel, state)) {
targetObservationSet.insert(pomdp->getObservation(state));
}
}
}
}
STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException,
"The formula is not supported by the grid approximation");
STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!");
storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<double>();
double overRes = storm::utility::one<double>();
double underRes = storm::utility::zero<double>();
std::unique_ptr<storm::pomdp::modelchecker::POMDPCheckResult<double>> result;
result = checker.computeReachabilityReward(*pomdp, targetObservationSet,
rewFormula.getOptimalityType() ==
storm::OptimizationDirection::Minimize,
pomdpSettings.getGridResolution());
overRes = result->OverapproximationValue;
underRes = result->UnderapproximationValue;
}
}
if (pomdpSettings.getMemoryBound() > 1) {
STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ...");

254
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -179,7 +179,6 @@ namespace storm {
}
}
finished = !improvement;
storm::utility::Stopwatch backupTimer(true);
// back up
result_backup = result;
@ -231,6 +230,249 @@ namespace storm {
POMDPCheckResult<ValueType>{overApprox, underApprox});
}
//TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication!
template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeReachabilityReward(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> targetObservations, bool min, uint64_t gridResolution) {
storm::utility::Stopwatch beliefGridTimer(true);
uint64_t maxIterations = 100;
bool finished = false;
uint64_t iteration = 0;
RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel();
std::vector<storm::pomdp::Belief<ValueType>> beliefList;
std::vector<bool> beliefIsTarget;
uint64_t nextId = 0;
// Initial belief always has ID 0
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId);
++nextId;
beliefList.push_back(initialBelief);
beliefIsTarget.push_back(
targetObservations.find(initialBelief.observation) != targetObservations.end());
std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget,
nextId);
nextId = beliefList.size();
beliefGridTimer.stop();
storm::utility::Stopwatch overApproxTimer(true);
// Belief ID -> Value
std::map<uint64_t, ValueType> result;
std::map<uint64_t, ValueType> result_backup;
// Belief ID -> ActionIndex
std::map<uint64_t, uint64_t> chosenActions;
// Belief ID -> Observation -> Probability
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> observationProbabilities;
// current ID -> action -> next ID
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> nextBelieves;
// current ID -> action -> reward
std::map<uint64_t, std::vector<ValueType>> beliefActionRewards;
storm::utility::Stopwatch nextBeliefGeneration(true);
for (size_t i = 0; i < beliefGrid.size(); ++i) {
auto currentBelief = beliefGrid[i];
bool isTarget = beliefIsTarget[currentBelief.id];
result.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero<ValueType>()));
if (!isTarget) {
//TODO put this in extra function
storm::utility::Stopwatch beliefWatch(true);
// As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative
uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices);
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices);
std::vector<ValueType> actionRewardsInState(numChoices);
for (uint64_t action = 0; action < numChoices; ++action) {
storm::utility::Stopwatch aopWatch(true);
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(
pomdp, currentBelief, action);
aopWatch.stop();
//STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl)
std::map<uint32_t, uint64_t> actionObservationBelieves;
for (auto iter = actionObservationProbabilities.begin();
iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
storm::utility::Stopwatch callWatch(true);
// THIS CALL IS SLOW
// TODO speed this up
actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp,
beliefList,
beliefIsTarget,
targetObservations,
currentBelief,
action,
observation,
nextId);
nextId = beliefList.size();
callWatch.stop();
//STORM_PRINT("Overall: " << callWatch << std::endl)
}
observationProbabilitiesInAction[action] = actionObservationProbabilities;
nextBelievesInAction[action] = actionObservationBelieves;
actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
currentBelief);
}
observationProbabilities.emplace(
std::make_pair(currentBelief.id, observationProbabilitiesInAction));
nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction));
beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState));
beliefWatch.stop();
//STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl)
}
}
nextBeliefGeneration.stop();
//Use chaching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::vector<ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache;
STORM_PRINT("Nr Grid Believes " << beliefGrid.size() << std::endl)
STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl)
// Value Iteration
while (!finished && iteration < maxIterations) {
storm::utility::Stopwatch iterationTimer(true);
STORM_LOG_DEBUG("Iteration " << iteration + 1);
bool improvement = false;
for (size_t i = 0; i < beliefGrid.size(); ++i) {
storm::pomdp::Belief<ValueType> currentBelief = beliefGrid[i];
bool isTarget = beliefIsTarget[currentBelief.id];
if (!isTarget) {
// we can take any state with the observation as they have the same number of choices
uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(currentBelief.observation).front());
// Initialize the values for the value iteration
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
storm::utility::Stopwatch actionWatch(true);
currentValue = beliefActionRewards[currentBelief.id][action];
storm::utility::Stopwatch loopTimer(true);
for (auto iter = observationProbabilities[currentBelief.id][action].begin();
iter != observationProbabilities[currentBelief.id][action].end(); ++iter) {
storm::utility::Stopwatch subsimplexTime(true);
uint32_t observation = iter->first;
storm::pomdp::Belief<ValueType> nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]];
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
// cache the values to not always re-calculate
std::vector<std::vector<ValueType>> subSimplex;
std::vector<ValueType> lambdas;
if (subSimplexCache.count(nextBelief.id) > 0) {
subSimplex = subSimplexCache[nextBelief.id];
lambdas = lambdaCache[nextBelief.id];
} else {
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
nextBelief.probabilities, gridResolution);
subSimplex = temp.first;
lambdas = temp.second;
subSimplexCache[nextBelief.id] = subSimplex;
lambdaCache[nextBelief.id] = lambdas;
}
subsimplexTime.stop();
//STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl)
storm::utility::Stopwatch sumTime(true);
auto sum = storm::utility::zero<ValueType>();
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
sum += lambdas[j] * result_backup.at(
getBeliefIdInVector(beliefGrid, observation, subSimplex[j]));
}
}
currentValue += iter->second * sum;
sumTime.stop();
//STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl)
}
loopTimer.stop();
//STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl)
// Update the selected actions
if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
(!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
chosenActionIndex = action;
}
actionWatch.stop();
//STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl)
}
result[currentBelief.id] = chosenValue;
chosenActions[currentBelief.id] = chosenActionIndex;
// Check if the iteration brought an improvement
if (cc.isLess(storm::utility::zero<ValueType>(), result_backup[currentBelief.id] - result[currentBelief.id]) ||
cc.isLess(storm::utility::zero<ValueType>(), result[currentBelief.id] - result_backup[currentBelief.id])) {
improvement = true;
}
}
}
finished = !improvement;
// back up
result_backup = result;
++iteration;
iterationTimer.stop();
STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl);
}
STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl);
beliefGrid.push_back(initialBelief);
beliefIsTarget.push_back(
targetObservations.find(initialBelief.observation) != targetObservations.end());
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
initialBelief.probabilities, gridResolution);
std::vector<std::vector<ValueType>> initSubSimplex = temp.first;
std::vector<ValueType> initLambdas = temp.second;
auto overApprox = storm::utility::zero<ValueType>();
for (size_t j = 0; j < initLambdas.size(); ++j) {
if (initLambdas[j] != storm::utility::zero<ValueType>()) {
overApprox += initLambdas[j] *
result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation,
initSubSimplex[j])];
}
}
overApproxTimer.stop();
/*
// Now onto the under-approximation
bool useMdp = false;true;
storm::utility::Stopwatch underApproxTimer(true);
ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min) :
computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result,
chosenActions, gridResolution, initialBelief.id, min);
underApproxTimer.stop();
STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl
<< "Time Overapproximation: " << overApproxTimer
<< std::endl
<< "Time Underapproximation: " << underApproxTimer
<< std::endl);
*/
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
//STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
return std::make_unique<POMDPCheckResult<ValueType>>(
POMDPCheckResult<ValueType>{overApprox, storm::utility::zero<ValueType>()});
}
template<typename ValueType, typename RewardModelType>
ValueType
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@ -831,6 +1073,16 @@ namespace storm {
}
}
template<typename ValueType, typename RewardModelType>
ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
uint64_t action, storm::pomdp::Belief<ValueType> belief) {
auto result = storm::utility::zero<ValueType>();
for (size_t i = 0; i < belief.probabilities.size(); ++i) {
result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix());
}
return result;
}
template
class ApproximatePOMDPModelchecker<double>;

Loading…
Cancel
Save