From f119e3d4c751f99276b301280675c7a6ecaa38a7 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 18 Oct 2019 11:32:36 +0200 Subject: [PATCH] Added reward over-approximation --- src/storm-pomdp-cli/storm-pomdp.cpp | 55 ++++ .../ApproximatePOMDPModelchecker.cpp | 254 +++++++++++++++++- 2 files changed, 308 insertions(+), 1 deletion(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index e6be182a3..b34ed65b7 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -183,6 +183,7 @@ int main(const int argc, const char** argv) { } STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); double overRes = storm::utility::one(); @@ -203,6 +204,60 @@ int main(const int argc, const char** argv) { pomdp = selfLoopEliminator.transform(); STORM_PRINT_AND_LOG(oldChoiceCount - pomdp->getNumberOfChoices() << " choices eliminated through self-loop elimination." << std::endl); } + if (pomdpSettings.isGridApproximationSet()) { + std::string rewardModelName; + storm::logic::RewardOperatorFormula const &rewFormula = formula->asRewardOperatorFormula(); + if (rewFormula.hasRewardModelName()) { + rewardModelName = rewFormula.getRewardModelName(); + } + storm::logic::Formula const &subformula1 = rewFormula.getSubformula(); + + std::set targetObservationSet; + //TODO refactor + bool validFormula = false; + if (subformula1.isEventuallyFormula()) { + storm::logic::EventuallyFormula const &eventuallyFormula = subformula1.asEventuallyFormula(); + storm::logic::Formula const &subformula2 = eventuallyFormula.getSubformula(); + if (subformula2.isAtomicLabelFormula()) { + storm::logic::AtomicLabelFormula const &alFormula = subformula2.asAtomicLabelFormula(); + validFormula = true; + std::string targetLabel = alFormula.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } else if (subformula2.isAtomicExpressionFormula()) { + validFormula = true; + std::stringstream stream; + stream << subformula2.asAtomicExpressionFormula().getExpression(); + storm::logic::AtomicLabelFormula formula3 = storm::logic::AtomicLabelFormula(stream.str()); + std::string targetLabel = formula3.getLabel(); + auto labeling = pomdp->getStateLabeling(); + for (size_t state = 0; state < pomdp->getNumberOfStates(); ++state) { + if (labeling.getStateHasLabel(targetLabel, state)) { + targetObservationSet.insert(pomdp->getObservation(state)); + } + } + } + } + STORM_LOG_THROW(validFormula, storm::exceptions::InvalidPropertyException, + "The formula is not supported by the grid approximation"); + STORM_LOG_ASSERT(!targetObservationSet.empty(), "The set of target observations is empty!"); + + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + double overRes = storm::utility::one(); + double underRes = storm::utility::zero(); + std::unique_ptr> result; + result = checker.computeReachabilityReward(*pomdp, targetObservationSet, + rewFormula.getOptimalityType() == + storm::OptimizationDirection::Minimize, + pomdpSettings.getGridResolution()); + overRes = result->OverapproximationValue; + underRes = result->UnderapproximationValue; + } + } if (pomdpSettings.getMemoryBound() > 1) { STORM_PRINT_AND_LOG("Computing the unfolding for memory bound " << pomdpSettings.getMemoryBound() << " and memory pattern '" << storm::storage::toString(pomdpSettings.getMemoryPattern()) << "' ..."); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index e0b12229a..43d2a0225 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -179,7 +179,6 @@ namespace storm { } } finished = !improvement; - storm::utility::Stopwatch backupTimer(true); // back up result_backup = result; @@ -231,6 +230,249 @@ namespace storm { POMDPCheckResult{overApprox, underApprox}); } + //TODO This function reuses a lot of code from the probability computation, refactor to minimize code duplication! + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, + std::set targetObservations, bool min, uint64_t gridResolution) { + storm::utility::Stopwatch beliefGridTimer(true); + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + + RewardModelType pomdpRewardModel = pomdp.getUniqueRewardModel(); + + std::vector> beliefList; + std::vector beliefIsTarget; + uint64_t nextId = 0; + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + + std::vector> beliefGrid; + constructBeliefGrid(pomdp, targetObservations, gridResolution, beliefList, beliefGrid, beliefIsTarget, + nextId); + nextId = beliefList.size(); + beliefGridTimer.stop(); + + storm::utility::Stopwatch overApproxTimer(true); + // Belief ID -> Value + std::map result; + std::map result_backup; + // Belief ID -> ActionIndex + std::map chosenActions; + + // Belief ID -> Observation -> Probability + std::map>> observationProbabilities; + // current ID -> action -> next ID + std::map>> nextBelieves; + // current ID -> action -> reward + std::map> beliefActionRewards; + + storm::utility::Stopwatch nextBeliefGeneration(true); + for (size_t i = 0; i < beliefGrid.size(); ++i) { + auto currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + if (!isTarget) { + //TODO put this in extra function + storm::utility::Stopwatch beliefWatch(true); + // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative + uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + std::vector> observationProbabilitiesInAction(numChoices); + std::vector> nextBelievesInAction(numChoices); + + std::vector actionRewardsInState(numChoices); + + for (uint64_t action = 0; action < numChoices; ++action) { + storm::utility::Stopwatch aopWatch(true); + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, currentBelief, action); + aopWatch.stop(); + //STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl) + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + storm::utility::Stopwatch callWatch(true); + // THIS CALL IS SLOW + // TODO speed this up + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, + beliefIsTarget, + targetObservations, + currentBelief, + action, + observation, + nextId); + nextId = beliefList.size(); + callWatch.stop(); + //STORM_PRINT("Overall: " << callWatch << std::endl) + } + observationProbabilitiesInAction[action] = actionObservationProbabilities; + nextBelievesInAction[action] = actionObservationBelieves; + + actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief); + } + observationProbabilities.emplace( + std::make_pair(currentBelief.id, observationProbabilitiesInAction)); + nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); + beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); + beliefWatch.stop(); + //STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl) + } + + } + nextBeliefGeneration.stop(); + + //Use chaching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + STORM_PRINT("Nr Grid Believes " << beliefGrid.size() << std::endl) + STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) + // Value Iteration + while (!finished && iteration < maxIterations) { + storm::utility::Stopwatch iterationTimer(true); + STORM_LOG_DEBUG("Iteration " << iteration + 1); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + storm::pomdp::Belief currentBelief = beliefGrid[i]; + bool isTarget = beliefIsTarget[currentBelief.id]; + if (!isTarget) { + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + // Initialize the values for the value iteration + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + storm::utility::Stopwatch actionWatch(true); + + currentValue = beliefActionRewards[currentBelief.id][action]; + storm::utility::Stopwatch loopTimer(true); + for (auto iter = observationProbabilities[currentBelief.id][action].begin(); + iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { + storm::utility::Stopwatch subsimplexTime(true); + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + // cache the values to not always re-calculate + std::vector> subSimplex; + std::vector lambdas; + if (subSimplexCache.count(nextBelief.id) > 0) { + subSimplex = subSimplexCache[nextBelief.id]; + lambdas = lambdaCache[nextBelief.id]; + } else { + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + subSimplex = temp.first; + lambdas = temp.second; + subSimplexCache[nextBelief.id] = subSimplex; + lambdaCache[nextBelief.id] = lambdas; + } + subsimplexTime.stop(); + //STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl) + storm::utility::Stopwatch sumTime(true); + auto sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + + currentValue += iter->second * sum; + sumTime.stop(); + //STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl) + } + loopTimer.stop(); + //STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl) + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + + chosenValue = currentValue; + chosenActionIndex = action; + } + actionWatch.stop(); + //STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl) + } + + result[currentBelief.id] = chosenValue; + + chosenActions[currentBelief.id] = chosenActionIndex; + // Check if the iteration brought an improvement + if (cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id]) || + cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { + improvement = true; + } + } + } + finished = !improvement; + // back up + result_backup = result; + + ++iteration; + iterationTimer.stop(); + STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); + } + + STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + + beliefGrid.push_back(initialBelief); + beliefIsTarget.push_back( + targetObservations.find(initialBelief.observation) != targetObservations.end()); + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> initSubSimplex = temp.first; + std::vector initLambdas = temp.second; + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; + } + } + overApproxTimer.stop(); + /* + // Now onto the under-approximation + bool useMdp = false;true; + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, + chosenActions, gridResolution, initialBelief.id, min); + underApproxTimer.stop(); + + STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl + << "Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl); +*/ + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + return std::make_unique>( + POMDPCheckResult{overApprox, storm::utility::zero()}); + } + template ValueType ApproximatePOMDPModelchecker::computeUnderapproximationWithDTMC(storm::models::sparse::Pomdp const &pomdp, @@ -831,6 +1073,16 @@ namespace storm { } } + template + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, + uint64_t action, storm::pomdp::Belief belief) { + auto result = storm::utility::zero(); + for (size_t i = 0; i < belief.probabilities.size(); ++i) { + result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); + } + return result; + } + template class ApproximatePOMDPModelchecker;