From c6902e0ca73fe084ed68074115e561931c711503 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Mon, 25 Nov 2019 11:08:07 +0100 Subject: [PATCH] Added reward MDP generation for the overapproximation --- .../ApproximatePOMDPModelchecker.cpp | 114 +++++++++++++++--- 1 file changed, 96 insertions(+), 18 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index c1ce9449e..bbb87684e 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -78,8 +78,7 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); // These are the components to build the MDP from the grid std::map beliefStateMap; @@ -344,8 +343,6 @@ namespace storm { auto model = std::make_shared>(overApproxMdp); auto modelPtr = std::static_pointer_cast>(model); - - std::vector parameterNames; storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); @@ -398,8 +395,16 @@ namespace storm { storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); ++nextId; beliefList.push_back(initialBelief); - beliefIsTarget.push_back( - targetObservations.find(initialBelief.observation) != targetObservations.end()); + beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); + + // These are the components to build the MDP from the grid + std::map beliefStateMap; + std::vector>> mdpTransitions; + std::vector targetStates; + uint64_t mdpStateId = 0; + + beliefStateMap[initialBelief.id] = mdpStateId; + ++mdpStateId; // for the initial belief, add the triangulated initial states std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas( @@ -410,6 +415,8 @@ namespace storm { lambdaCache[0] = initLambdas; bool initInserted = false; + std::vector> initTransitionsInBelief; + std::map initTransitionInActionBelief; for (size_t j = 0; j < initLambdas.size(); ++j) { if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { @@ -429,11 +436,22 @@ namespace storm { targetObservations.find(initialBelief.observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); ++nextId; + + beliefStateMap[nextId] = mdpStateId; + initTransitionInActionBelief[mdpStateId] = initLambdas[j]; + ++nextId; + ++mdpStateId; } } } } + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + if (!initTransitionInActionBelief.empty()) { + initTransitionsInBelief.push_back(initTransitionInActionBelief); + mdpTransitions.push_back(initTransitionsInBelief); + } + //beliefsToBeExpanded.push_back(initialBelief.id); TODO I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting @@ -445,17 +463,26 @@ namespace storm { result.emplace(std::make_pair(currId, storm::utility::zero())); result_backup.emplace(std::make_pair(currId, storm::utility::zero())); - if (!isTarget) { - + if (isTarget) { + // MDP stuff + std::vector> transitionsInBelief; + targetStates.push_back(beliefStateMap[currId]); + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + mdpTransitions.push_back(transitionsInBelief); + } else { uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); std::vector> observationProbabilitiesInAction(numChoices); std::vector> nextBelievesInAction(numChoices); std::vector actionRewardsInState(numChoices); + std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); std::map actionObservationBelieves; + std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; // THIS CALL IS SLOW @@ -490,7 +517,13 @@ namespace storm { beliefIsTarget.push_back( targetObservations.find(observation) != targetObservations.end()); beliefsToBeExpanded.push_back(nextId); + + beliefStateMap[nextId] = mdpStateId; + transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; ++nextId; + ++mdpStateId; + } else { + transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; } } } @@ -499,12 +532,20 @@ namespace storm { nextBelievesInAction[action] = actionObservationBelieves; actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); + if (!transitionInActionBelief.empty()) { + transitionsInBelief.push_back(transitionInActionBelief); + } } - observationProbabilities.emplace( - std::make_pair(currId, observationProbabilitiesInAction)); + observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); + if (transitionsInBelief.empty()) { + std::map transitionInActionBelief; + transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one(); + transitionsInBelief.push_back(transitionInActionBelief); + } + mdpTransitions.push_back(transitionsInBelief); } } expansionTimer.stop(); @@ -512,6 +553,31 @@ namespace storm { STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) + storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + mdpLabeling.addLabel("init"); + mdpLabeling.addLabel("target"); + mdpLabeling.addLabelToState("init", 0); + for (auto targetState : targetStates) { + mdpLabeling.addLabelToState("target", targetState); + } + + storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); + storm::models::sparse::Mdp overApproxMdp(modelComponents); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : beliefStateMap) { + auto currentBelief = beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); + overApproxMdp.printModelInformationToStream(std::cout); + storm::utility::Stopwatch overApproxTimer(true); // Value Iteration while (!finished && iteration < maxIterations) { @@ -600,13 +666,12 @@ namespace storm { auto overApprox = storm::utility::zero(); for (size_t j = 0; j < initLambdas.size(); ++j) { if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * - result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, - initSubSimplex[j])]; + overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; } } overApproxTimer.stop(); - + ValueType underApprox = storm::utility::zero(); + /* storm::utility::Stopwatch underApproxTimer(true); ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, result, chosenActions, gridResolution, initialBelief.id, min, true); @@ -615,13 +680,26 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl << "Time Underapproximation: " << underApproxTimer - << std::endl); + << std::endl);*/ STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - return std::make_unique>( - POMDPCheckResult{overApprox, underApprox}); + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); + + std::string propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); }