From 4b8664c521bb0d21ffcc9691cad58a28c48f9468 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Tue, 22 Oct 2019 10:30:50 +0200 Subject: [PATCH] Added reward under-approximation --- src/storm-pomdp-cli/storm-pomdp.cpp | 4 + .../ApproximatePOMDPModelchecker.cpp | 110 ++++++++++-------- .../ApproximatePOMDPModelchecker.h | 4 +- 3 files changed, 66 insertions(+), 52 deletions(-) diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index b34ed65b7..dd1776149 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -24,6 +24,8 @@ #include "storm/settings/modules/BuildSettings.h" #include "storm/settings/modules/JitBuilderSettings.h" #include "storm/settings/modules/TopologicalEquationSolverSettings.h" +#include "storm/settings/modules/ModelCheckerSettings.h" +#include "storm/settings/modules/MultiplierSettings.h" #include "storm/settings/modules/MultiObjectiveSettings.h" #include "storm-pomdp-cli/settings/modules/POMDPSettings.h" @@ -67,6 +69,8 @@ void initializeSettings() { storm::settings::addModule(); storm::settings::addModule(); storm::settings::addModule(); + storm::settings::addModule(); + storm::settings::addModule(); storm::settings::addModule(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 43d2a0225..ad2fd4973 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -71,9 +71,6 @@ namespace storm { result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function - std::vector> observationProbabilitiesInAction; - std::vector> nextBelievesInAction; - uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); std::vector> observationProbabilitiesInAction(numChoices); @@ -86,6 +83,8 @@ namespace storm { for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; + // THIS CALL IS SLOW + // TODO speed this up actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, @@ -110,7 +109,6 @@ namespace storm { std::map>> subSimplexCache; std::map> lambdaCache; - STORM_PRINT("Nr Believes " << beliefList.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration while (!finished && iteration < maxIterations) { @@ -131,7 +129,7 @@ namespace storm { ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = storm::utility::zero(); // simply change this for rewards? + currentValue = storm::utility::zero(); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { uint32_t observation = iter->first; @@ -172,8 +170,7 @@ namespace storm { result[currentBelief.id] = chosenValue; chosenActions[currentBelief.id] = chosenActionIndex; // Check if the iteration brought an improvement - if ((min && cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) || - (!min && cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id]))) { + if (cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id])) { improvement = true; } } @@ -209,12 +206,12 @@ namespace storm { overApproxTimer.stop(); // Now onto the under-approximation - bool useMdp = /*false;*/true; + bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min) : + result, chosenActions, gridResolution, initialBelief.id, min, false) : computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min); + chosenActions, gridResolution, initialBelief.id, min, false); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -281,7 +278,6 @@ namespace storm { result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); if (!isTarget) { //TODO put this in extra function - storm::utility::Stopwatch beliefWatch(true); // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); @@ -291,16 +287,12 @@ namespace storm { std::vector actionRewardsInState(numChoices); for (uint64_t action = 0; action < numChoices; ++action) { - storm::utility::Stopwatch aopWatch(true); std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); - aopWatch.stop(); - //STORM_PRINT("AOP " << actionObservationProbabilities.size() << ": " << aopWatch << std::endl) std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; - storm::utility::Stopwatch callWatch(true); // THIS CALL IS SLOW // TODO speed this up actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, @@ -312,8 +304,6 @@ namespace storm { observation, nextId); nextId = beliefList.size(); - callWatch.stop(); - //STORM_PRINT("Overall: " << callWatch << std::endl) } observationProbabilitiesInAction[action] = actionObservationProbabilities; nextBelievesInAction[action] = actionObservationBelieves; @@ -325,8 +315,6 @@ namespace storm { std::make_pair(currentBelief.id, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currentBelief.id, nextBelievesInAction)); beliefActionRewards.emplace(std::make_pair(currentBelief.id, actionRewardsInState)); - beliefWatch.stop(); - //STORM_PRINT("Belief " << currentBelief.id << " (" << isTarget << "): " << beliefWatch << std::endl) } } @@ -336,7 +324,6 @@ namespace storm { std::map>> subSimplexCache; std::map> lambdaCache; - STORM_PRINT("Nr Grid Believes " << beliefGrid.size() << std::endl) STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration while (!finished && iteration < maxIterations) { @@ -357,13 +344,9 @@ namespace storm { ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { - storm::utility::Stopwatch actionWatch(true); - currentValue = beliefActionRewards[currentBelief.id][action]; - storm::utility::Stopwatch loopTimer(true); for (auto iter = observationProbabilities[currentBelief.id][action].begin(); iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - storm::utility::Stopwatch subsimplexTime(true); uint32_t observation = iter->first; storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief @@ -381,9 +364,6 @@ namespace storm { subSimplexCache[nextBelief.id] = subSimplex; lambdaCache[nextBelief.id] = lambdas; } - subsimplexTime.stop(); - //STORM_PRINT("--subsimplex: " << subsimplexTime.getTimeInNanoseconds() << "ns" << std::endl) - storm::utility::Stopwatch sumTime(true); auto sum = storm::utility::zero(); for (size_t j = 0; j < lambdas.size(); ++j) { if (!cc.isEqual(lambdas[j], storm::utility::zero())) { @@ -393,11 +373,7 @@ namespace storm { } currentValue += iter->second * sum; - sumTime.stop(); - //STORM_PRINT("--value: " << sumTime.getTimeInNanoseconds() << "ns" << std::endl) } - loopTimer.stop(); - //STORM_PRINT("-Loop: " << loopTimer.getTimeInNanoseconds() << "ns" << std::endl) // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || (!min && @@ -407,8 +383,6 @@ namespace storm { chosenValue = currentValue; chosenActionIndex = action; } - actionWatch.stop(); - //STORM_PRINT("Action: " << actionWatch.getTimeInNanoseconds() << "ns" << std::endl) } result[currentBelief.id] = chosenValue; @@ -450,14 +424,14 @@ namespace storm { } } overApproxTimer.stop(); - /* + // Now onto the under-approximation - bool useMdp = false;true; + bool useMdp = true; storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = useMdp ? computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min) : - computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, - chosenActions, gridResolution, initialBelief.id, min); + result, chosenActions, gridResolution, initialBelief.id, min, true) : + computeUnderapproximationWithDTMC(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, true); underApproxTimer.stop(); STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl @@ -465,12 +439,11 @@ namespace storm { << std::endl << "Time Underapproximation: " << underApproxTimer << std::endl); -*/ STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); return std::make_unique>( - POMDPCheckResult{overApprox, storm::utility::zero()}); + POMDPCheckResult{overApprox, underApprox}); } template @@ -483,7 +456,8 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min) { + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeReward) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -539,14 +513,30 @@ namespace storm { for (auto targetState : targetStates) { labeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents( - buildTransitionMatrix(transitions), labeling); + + storm::models::sparse::StandardRewardModel rewardModel(std::vector(beliefStateMap.size())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + // Add the reward collected by taking the chosen Action in the belief + rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex( + storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first])), + currentBelief)); + } + + std::unordered_map rewardModels = {{"std", rewardModel}}; + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(transitions), labeling, rewardModels); storm::models::sparse::Dtmc underApproxDtmc(modelComponents); auto model = std::make_shared>(underApproxDtmc); model->printModelInformationToStream(std::cout); - std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::string propertyString; + if (computeReward) { + propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + } else { + propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + } std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties( propertyVector).front(); @@ -569,7 +559,8 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min) { + uint64_t gridResolution, uint64_t initialBeliefId, bool min, + bool computeRewards) { std::set visitedBelieves; std::deque believesToBeExpanded; std::map beliefStateMap; @@ -598,7 +589,6 @@ namespace storm { pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); if (chosenActions.find(currentBeliefId) == chosenActions.end()) { // If the current Belief is not part of the grid, the next states have not been computed yet. - // For now, this is a very dirty workaround because I am currently to lazy to refactor everything to be able to do this without the extractBestAction method std::vector> observationProbabilitiesInAction; std::vector> nextBelievesInAction; for (uint64_t action = 0; action < numChoices; ++action) { @@ -638,7 +628,6 @@ namespace storm { believesToBeExpanded.push_back(nextBeliefId); } transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; - //STORM_PRINT("Transition with action " << action << " from state " << beliefStateMap[currentBeliefId] << " to state " << beliefStateMap[nextBeliefId] << " with prob " << iter->second << std::endl) } actionTransitionStorage.push_back(transitionsInStateWithAction); } @@ -659,10 +648,32 @@ namespace storm { buildTransitionMatrix(transitions), labeling); storm::models::sparse::Mdp underApproxMdp(modelComponents); + + if (computeRewards) { + storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + rewardModel.setStateActionReward(underApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + underApproxMdp.addRewardModel("std", rewardModel); + underApproxMdp.restrictRewardModels(std::set({"std"})); + } + auto model = std::make_shared>(underApproxMdp); model->printModelInformationToStream(std::cout); - std::string propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + std::string propertyString; + if (computeRewards) { + propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + } else { + propertyString = min ? "Pmin=? [F \"target\"]" : "Pmax=? [F \"target\"]"; + } std::vector propertyVector = storm::api::parseProperties(propertyString); std::shared_ptr property = storm::api::extractFormulasFromProperties( propertyVector).front(); @@ -696,7 +707,6 @@ namespace storm { smb.newRowGroup(currentRow); for (auto const &map : actionTransitions) { for (auto const &transition : map) { - //STORM_PRINT(" Add transition from state " << currentRowGroup << " to state " << transition.first << " with prob " << transition.second << std::endl) smb.addNextValue(currentRow, transition.first, transition.second); } ++currentRow; diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index b8ad0fa66..f3947dcdc 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -77,7 +77,7 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min); + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward); ValueType computeUnderapproximationWithMDP(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, @@ -87,7 +87,7 @@ namespace storm { std::map>> &nextBelieves, std::map &result, std::map chosenActions, - uint64_t gridResolution, uint64_t initialBeliefId, bool min); + uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeRewards); /** *