From 6fee61feb1628846d93617194246ba07604432fd Mon Sep 17 00:00:00 2001 From: Tim Quatmann Date: Fri, 27 Mar 2020 15:11:52 +0100 Subject: [PATCH] POMDP: Started to split belief logic from exploration logic. --- .../ApproximatePOMDPModelchecker.cpp | 343 ++++++++---------- .../ApproximatePOMDPModelchecker.h | 3 +- src/storm-pomdp/storage/BeliefGrid.h | 296 +++++++++++++++ 3 files changed, 453 insertions(+), 189 deletions(-) create mode 100644 src/storm-pomdp/storage/BeliefGrid.h diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 1ff8d07ca..16704e4a4 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -1,5 +1,7 @@ #include "ApproximatePOMDPModelchecker.h" +#include + #include #include "storm-pomdp/analysis/FormulaInformation.h" @@ -21,6 +23,8 @@ #include "storm/api/export.h" #include "storm-parsers/api/storm-parsers.h" +#include "storm-pomdp/storage/BeliefGrid.h" + #include "storm/utility/macros.h" #include "storm/utility/SignalHandler.h" #include "storm/exceptions/NotSupportedException.h" @@ -322,6 +326,16 @@ namespace storm { } + + template + ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) { + ValueType result = storm::utility::zero(); + for (auto const& entry : belief) { + result += storm::utility::convertNumber(entry.second) * storm::utility::convertNumber(summands.at(entry.first)); + } + return result; + } + template std::shared_ptr> ApproximatePOMDPModelchecker::computeFirstRefinementStep(std::set const &targetObservations, bool min, @@ -338,15 +352,14 @@ namespace storm { underMap = underApproximationMap.value(); } - std::vector> beliefList; - std::vector beliefIsTarget; - std::vector> beliefGrid; + storm::storage::BeliefGrid, ValueType> newBeliefGrid(pomdp, options.numericPrecision); //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; + storm::storage::BitVector expandedBeliefs; // current ID -> action -> reward std::map> beliefActionRewards; @@ -355,244 +368,185 @@ namespace storm { // Initial belief always has belief ID 0 storm::pomdp::Belief initialBelief = getInitialBelief(nextId); ++nextId; - beliefList.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); // These are the components to build the MDP from the grid // Reserve states 0 and 1 as always sink/goal states - std::vector>> mdpTransitions = {{{{0, storm::utility::one()}}}, - {{{1, storm::utility::one()}}}}; + storm::storage::SparseMatrixBuilder mdpTransitionsBuilder(0, 0, 0, true, true); + uint64_t extraBottomState = 0; + uint64_t extraTargetState = computeRewards ? 0 : 1; + uint64_t nextMdpStateId = extraTargetState + 1; + uint64_t mdpMatrixRow = 0; + for (uint64_t state = 0; state < nextMdpStateId; ++state) { + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one()); + ++mdpMatrixRow; + } // Hint vector for the MDP modelchecker (initialize with constant sink/goal values) - std::vector hintVector = {storm::utility::zero(), storm::utility::one()}; - std::vector targetStates = {1}; - uint64_t mdpStateId = 2; - - beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId)); - ++mdpStateId; + std::vector hintVector(nextMdpStateId, storm::utility::zero()); + if (!computeRewards) { + hintVector[extraTargetState] = storm::utility::one(); + } + std::vector targetStates = {extraTargetState}; // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace std::map weightedSumOverMap; std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates()); - auto initSubSimplex = initTemp.first; - auto initLambdas = initTemp.second; + auto triangulation = newBeliefGrid.triangulateBelief(initialBelief.probabilities, observationResolutionVector[initialBelief.observation]); if (options.cacheSubsimplices) { - subSimplexCache[0] = initSubSimplex; - lambdaCache[0] = initLambdas; + //subSimplexCache[0] = initSubSimplex; + //lambdaCache[0] = initLambdas; } std::vector> initTransitionsInBelief; - std::map initTransitionInActionBelief; - bool initInserted = false; - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (!cc.isEqual(initLambdas[j], storm::utility::zero())) { - uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]); - if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) { - if (searchResult == 0) { - // the initial belief is on the grid itself - if (boundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { - tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(underMap[i]); - } - weightedSumOverMap[initialBelief.id] = tempWeightedSumOver; - weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder; - } - initInserted = true; - beliefGrid.push_back(initialBelief); - beliefsToBeExpanded.push_back(0); - hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } else { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - if (boundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) { - tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber(underMap[i]); - } - - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - } - - storm::pomdp::Belief gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - beliefsToBeExpanded.push_back(nextId); - ++nextId; - - hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + uint64_t initialMdpState = nextMdpStateId; + ++nextMdpStateId; + if (triangulation.size() == 1) { + // The initial belief is on the grid itself + auto initBeliefId = triangulation.gridPoints.front(); + if (boundMapsSet) { + auto const& gridPoint = newBeliefGrid.getGridPoint(initBeliefId); + weightedSumOverMap[initBeliefId] = getWeightedSum(gridPoint, overMap); + weightedSumUnderMap[initBeliefId] = getWeightedSum(gridPoint, underMap); + } + beliefsToBeExpanded.push_back(initBeliefId); + beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState)); + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() : storm::utility::zero()); - - beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId)); - initTransitionInActionBelief[mdpStateId] = initLambdas[j]; - ++nextId; - ++mdpStateId; - } + } else { + // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + for (uint64_t i = 0; i < triangulation.size(); ++i) { + beliefsToBeExpanded.push_back(triangulation.gridPoints[i]); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]); + beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId)); + ++nextMdpStateId; + if (boundMapsSet) { + auto const& gridPoint = newBeliefGrid.getGridPoint(triangulation.gridPoints[i]); + weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, overMap); + weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum(gridPoint, underMap); } + hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one() + : storm::utility::zero()); } + //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting + ++mdpMatrixRow; } - - // If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs - if (!initTransitionInActionBelief.empty()) { - initTransitionsInBelief.push_back(initTransitionInActionBelief); - mdpTransitions.push_back(initTransitionsInBelief); - } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting - + // Expand the beliefs to generate the grid on-the-fly if (options.explorationThreshold > storm::utility::zero()) { STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) } + expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); while (!beliefsToBeExpanded.empty()) { uint64_t currId = beliefsToBeExpanded.front(); - beliefsToBeExpanded.pop_front(); - bool isTarget = beliefIsTarget[currId]; - if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { - // TODO: with rewards whe would have to assign the corresponding reward to this transition - mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); - continue; - } - - if (isTarget) { - // Depending on whether we compute rewards, we select the right initial result - // MDP stuff - targetStates.push_back(beliefStateMap.left.at(currId)); - mdpTransitions.push_back({{{beliefStateMap.left.at(currId), storm::utility::one()}}}); + beliefsToBeExpanded.pop_front(); + expandedBeliefs.set(currId, true); // Do not expand this belief again. + assert(currId < expandedBeliefs.size()); + uint64_t currMdpState = beliefStateMap.left.at(currId); + auto const& currBelief = newBeliefGrid.getGridPoint(currId); + uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first); + + mdpTransitionsBuilder.newRowGroup(mdpMatrixRow); + + if (targetObservations.count(currObservation) != 0) { + // Make this state absorbing + targetStates.push_back(currMdpState); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one()); + ++mdpMatrixRow; + } else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { + // TODO: with rewards we would have to assign the corresponding reward to this transition + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]); + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one() - weightedSumOverMap[currId]); + ++mdpMatrixRow; } else { - uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); - uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); + auto const& currBelief = newBeliefGrid.getGridPoint(currId); + uint64_t someState = currBelief.begin()->first; + uint64_t numChoices = pomdp.getNumberOfChoices(someState); std::vector actionRewardsInState(numChoices); std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { - std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action); - std::map transitionInActionBelief; - for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { - uint32_t observation = iter->first; - // THIS CALL IS SLOW - // TODO speed this up - uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, - observation, nextId); - nextId = beliefList.size(); - //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; - std::vector lambdas; - if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { - subSimplex = subSimplexCache[idNextBelief]; - lambdas = lambdaCache[idNextBelief]; - } else { - auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, - observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); - subSimplex = temp.first; - lambdas = temp.second; - if (options.cacheSubsimplices) { - subSimplexCache[idNextBelief] = subSimplex; - lambdaCache[idNextBelief] = lambdas; - } - } - - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - auto approxId = getBeliefIdInVector(beliefGrid, observation, subSimplex[j]); - if (approxId == uint64_t(-1)) { - // if the triangulated belief was not found in the list, we place it in the grid and add it to the work list - storm::pomdp::Belief gridBelief = {nextId, observation, subSimplex[j]}; - beliefList.push_back(gridBelief); - beliefGrid.push_back(gridBelief); - beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end()); - // compute overapproximate value using MDP result map - if (boundMapsSet) { - auto tempWeightedSumOver = storm::utility::zero(); - auto tempWeightedSumUnder = storm::utility::zero(); - for (uint64_t i = 0; i < subSimplex[j].size(); ++i) { - tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber(overMap[i]); - tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber(underMap[i]); - } - if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) { - hintVector.push_back(tempWeightedSumOver); - } else { - hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - weightedSumOverMap[nextId] = tempWeightedSumOver; - weightedSumUnderMap[nextId] = tempWeightedSumUnder; - } else { - hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one() - : storm::utility::zero()); - } - beliefsToBeExpanded.push_back(nextId); - beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId)); - transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; - ++nextId; - ++mdpStateId; + auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector); + // Check for newly found grid points + expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false); + for (auto const& successor : successorGridPoints) { + auto successorId = successor.first; + auto successorBelief = newBeliefGrid.getGridPoint(successorId); + auto successorObservation = pomdp.getObservation(successorBelief.begin()->first); + if (!expandedBeliefs.get(successorId)) { + beliefsToBeExpanded.push_back(successorId); + beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId)); + ++nextMdpStateId; + + if (boundMapsSet) { + ValueType upperBound = getWeightedSum(successorBelief, overMap); + ValueType lowerBound = getWeightedSum(successorBelief, underMap); + if (cc.isEqual(upperBound, lowerBound)) { + hintVector.push_back(lowerBound); } else { - transitionInActionBelief[beliefStateMap.left.at(approxId)] = iter->second * lambdas[j]; + hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); } + weightedSumOverMap[successorId] = upperBound; + weightedSumUnderMap[successorId] = lowerBound; + } else { + hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one() : storm::utility::zero()); } } + auto successorMdpState = beliefStateMap.left.at(successorId); + // This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted. + mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second); } - if (!transitionInActionBelief.empty()) { - transitionsInBelief.push_back(transitionInActionBelief); - } + ++mdpMatrixRow; } - if (transitionsInBelief.empty()) { - std::map transitionInActionBelief; - transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); - transitionsInBelief.push_back(transitionInActionBelief); - } - mdpTransitions.push_back(transitionsInBelief); } if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildAborted = true; break; } } - statistics.overApproximationStates = mdpTransitions.size(); - STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl); + statistics.overApproximationStates = nextMdpStateId; STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); if (storm::utility::resources::isTerminate()) { statistics.overApproximationBuildTime.stop(); return nullptr; } - storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); + storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId); mdpLabeling.addLabel("init"); mdpLabeling.addLabel("target"); - mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id)); + mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialMdpState)); for (auto targetState : targetStates) { mdpLabeling.addLabelToState("target", targetState); } - storm::storage::sparse::ModelComponents modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); - storm::models::sparse::Mdp overApproxMdp(modelComponents); + storm::storage::sparse::ModelComponents modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling)); + for (uint64_t row = 0; row < modelComponents.transitionMatrix.getRowCount(); ++row) { + if (!storm::utility::isOne(modelComponents.transitionMatrix.getRowSum(row))) { + std::cout << "Row " << row << " does not sum up to one. " << modelComponents.transitionMatrix.getRowSum(row) << " instead" << std::endl; + } + } + auto overApproxMdp = std::make_shared>(std::move(modelComponents)); if (computeRewards) { - storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(mdpMatrixRow)); for (auto const &iter : beliefStateMap.left) { - auto currentBelief = beliefList[iter.first]; - auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); - for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + auto currentBelief = newBeliefGrid.getGridPoint(iter.first); + auto representativeState = currentBelief.begin()->first; + for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) { // Add the reward - mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), - getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - currentBelief)); + uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action)); + uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)); + mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief)); } } - overApproxMdp.addRewardModel("std", mdpRewardModel); - overApproxMdp.restrictRewardModels(std::set({"std"})); + overApproxMdp->addRewardModel("default", mdpRewardModel); + overApproxMdp->restrictRewardModels(std::set({"default"})); } statistics.overApproximationBuildTime.stop(); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); - overApproxMdp.printModelInformationToStream(std::cout); + overApproxMdp->printModelInformationToStream(std::cout); - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); + auto modelPtr = std::static_pointer_cast>(overApproxMdp); std::string propertyString = computeRewards ? "R" : "P"; propertyString += min ? "min" : "max"; propertyString += "=? [F \"target\"]"; @@ -604,31 +558,34 @@ namespace storm { auto hintPtr = std::make_shared>(hint); task.setHint(hintPtr); statistics.overApproximationCheckTime.start(); - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, task)); + std::unique_ptr res(storm::api::verifyWithSparseEngine(overApproxMdp, task)); statistics.overApproximationCheckTime.stop(); if (storm::utility::resources::isTerminate() && !res) { return nullptr; } STORM_LOG_ASSERT(res, "Result does not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true))); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true))); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); - //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, - maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //auto underApprox = weightedSumUnderMap[initialBelief.id]; + /* TODO: Enable under approx again: + auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); if (storm::utility::resources::isTerminate() && !underApproxComponents) { return std::make_unique>( RefinementComponents{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); } STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); - return std::make_unique>( RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); + */ + return std::make_unique>(RefinementComponents{modelPtr, overApprox, storm::utility::zero(), overApproxResultMap, + {}, {}, {}, {}, beliefStateMap, {}, initialBelief.id}); + } template @@ -973,7 +930,8 @@ namespace storm { std::vector observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); } - + + template std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(std::vector> &beliefList, @@ -1307,7 +1265,16 @@ namespace storm { } template - ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief) { + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, std::map const& belief) { + auto result = storm::utility::zero(); + for (auto const &probEntry : belief) { + result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); + } + return result; + } + + template + ValueType ApproximatePOMDPModelchecker::getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief) { auto result = storm::utility::zero(); for (auto const &probEntry : belief.probabilities) { result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 782220358..4820ea380 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -240,7 +240,8 @@ namespace storm { * @param belief the belief in which the action is performed * @return the reward earned by performing the action in the belief */ - ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief &belief); + ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief const& belief); + ValueType getRewardAfterAction(uint64_t action, std::map const& belief); struct Statistics { Statistics(); diff --git a/src/storm-pomdp/storage/BeliefGrid.h b/src/storm-pomdp/storage/BeliefGrid.h new file mode 100644 index 000000000..a75872ee4 --- /dev/null +++ b/src/storm-pomdp/storage/BeliefGrid.h @@ -0,0 +1,296 @@ +#pragma once + +#include +#include +//#include + +#include "storm/utility/macros.h" +#include "storm/exceptions/UnexpectedException.h" + +namespace storm { + namespace storage { + + template + class BeliefGrid { + public: + + typedef typename PomdpType::ValueType ValueType; + //typedef boost::container::flat_map BeliefType + typedef std::map BeliefType; + typedef uint64_t BeliefId; + + BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) { + // Intentionally left empty + } + + struct Triangulation { + std::vector gridPoints; + std::vector weights; + uint64_t size() const { + return weights.size(); + } + }; + + BeliefType const& getGridPoint(BeliefId const& id) const { + return gridPoints[id]; + } + + BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const { + auto idIt = gridPointToIdMap.find(gridPoint); + STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state."); + return idIt->second; + } + + bool isEqual(BeliefType const& first, BeliefType const& second) const { + if (first.size() != second.size()) { + return false; + } + auto secondIt = second.begin(); + for (auto const& firstEntry : first) { + if (firstEntry.first != secondIt->first) { + return false; + } + if (!cc.isEqual(firstEntry.second, secondIt->second)) { + return false; + } + ++secondIt; + } + return true; + } + + bool assertBelief(BeliefType const& belief) const { + BeliefValueType sum = storm::utility::zero(); + boost::optional observation; + for (auto const& entry : belief) { + uintmax_t entryObservation = pomdp.getObservation(entry.first); + if (observation) { + if (observation.get() != entryObservation) { + STORM_LOG_ERROR("Beliefsupport contains different observations."); + return false; + } + } else { + observation = entryObservation; + } + if (cc.isZero(entry.second)) { + // We assume that beliefs only consider their support. + STORM_LOG_ERROR("Zero belief probability."); + return false; + } + if (cc.isLess(entry.second, storm::utility::zero())) { + STORM_LOG_ERROR("Negative belief probability."); + return false; + } + if (cc.isLess(storm::utility::one(), entry.second)) { + STORM_LOG_ERROR("Belief probability greater than one."); + return false; + } + sum += entry.second; + } + if (!cc.isOne(sum)) { + STORM_LOG_ERROR("Belief does not sum up to one."); + return false; + } + return true; + } + + bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const { + if (triangulation.weights.size() != triangulation.gridPoints.size()) { + STORM_LOG_ERROR("Number of weights and points in triangulation does not match."); + return false; + } + if (triangulation.size() == 0) { + STORM_LOG_ERROR("Empty triangulation."); + return false; + } + BeliefType triangulatedBelief; + BeliefValueType weightSum = storm::utility::zero(); + for (uint64_t i = 0; i < triangulation.weights.size(); ++i) { + if (cc.isZero(triangulation.weights[i])) { + STORM_LOG_ERROR("Zero weight in triangulation."); + return false; + } + if (cc.isLess(triangulation.weights[i], storm::utility::zero())) { + STORM_LOG_ERROR("Negative weight in triangulation."); + return false; + } + if (cc.isLess(storm::utility::one(), triangulation.weights[i])) { + STORM_LOG_ERROR("Weight greater than one in triangulation."); + } + weightSum += triangulation.weights[i]; + BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]); + for (auto const& pointEntry : gridPoint) { + BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero()).first->second; + triangulatedValue += triangulation.weights[i] * pointEntry.second; + } + } + if (!cc.isOne(weightSum)) { + STORM_LOG_ERROR("Triangulation weights do not sum up to one."); + return false; + } + if (!assertBelief(triangulatedBelief)) { + STORM_LOG_ERROR("Triangulated belief is not a belief."); + } + if (!isEqual(belief, triangulatedBelief)) { + STORM_LOG_ERROR("Belief does not match triangulated belief."); + return false; + } + return true; + } + + Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) { + //TODO this can also be simplified using the sparse vector interpretation + //TODO Enable chaching for this method? + STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid."); + + auto nrStates = pomdp.getNumberOfStates(); + + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are based on the paper + // TODO avoid reallocations for these vectors + std::vector x(nrStates); + std::vector v(nrStates); + std::vector d(nrStates); + auto convResolution = storm::utility::convertNumber(resolution); + + for (size_t i = 0; i < nrStates; ++i) { + for (auto const &probEntry : belief) { + if (probEntry.first >= i) { + x[i] += convResolution * probEntry.second; + } + } + v[i] = storm::utility::floor(x[i]); + d[i] = x[i] - v[i]; + } + + auto p = storm::utility::vector::getSortedIndices(d); + + std::vector> qs(nrStates, std::vector(nrStates)); + for (size_t i = 0; i < nrStates; ++i) { + if (i == 0) { + for (size_t j = 0; j < nrStates; ++j) { + qs[i][j] = v[j]; + } + } else { + for (size_t j = 0; j < nrStates; ++j) { + if (j == p[i - 1]) { + qs[i][j] = qs[i - 1][j] + storm::utility::one(); + } else { + qs[i][j] = qs[i - 1][j]; + } + } + } + } + + Triangulation result; + // The first weight is 1-sum(other weights). We therefore process the js in reverse order + BeliefValueType firstWeight = storm::utility::one(); + for (size_t j = nrStates; j > 0;) { + --j; + // First create the weights. The weights vector will be reversed at the end. + ValueType weight; + if (j == 0) { + weight = firstWeight; + } else { + weight = d[p[j - 1]] - d[p[j]]; + firstWeight -= weight; + } + if (!cc.isZero(weight)) { + result.weights.push_back(weight); + BeliefType gridPoint; + auto const& qsj = qs[j]; + for (size_t i = 0; i < nrStates - 1; ++i) { + BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1]; + if (!cc.isZero(gridPointEntry)) { + gridPoint[i] = gridPointEntry / convResolution; + } + } + if (!cc.isZero(qsj[nrStates - 1])) { + gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution; + } + result.gridPoints.push_back(getOrAddGridPointId(gridPoint)); + } + } + std::reverse(result.weights.begin(), result.weights.end()); + + STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation."); + + return result; + } + + template + void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) { + auto insertionRes = distr.emplace(state, value); + if (!insertionRes.second) { + insertionRes.first->second += value; + } + } + + BeliefId getNumberOfGridPointIds() const { + return gridPoints.size(); + } + + std::map expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector const& observationResolutions) { + + std::map destinations; // The belief ids should be ordered + + BeliefType gridPoint = getGridPoint(gridPointId); + + // Find the probability we go to each observation + BeliefType successorObs; // This is actually not a belief but has the same type + for (auto const& pointEntry : gridPoint) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (!storm::utility::isZero(pomdpTransition.getValue())) { + auto obs = pomdp.getObservation(pomdpTransition.getColumn()); + addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue()); + } + } + } + + // Now for each successor observation we find and triangulate the successor belief + for (auto const& successor : successorObs) { + BeliefType successorBelief; + for (auto const& pointEntry : gridPoint) { + uint64_t state = pointEntry.first; + for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) { + if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) { + ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second; + addToDistribution(successorBelief, pomdpTransition.getColumn(), prob); + } + } + } + STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief."); + + Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]); + for (size_t j = 0; j < triangulation.size(); ++j) { + addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second); + } + } + + return destinations; + + } + + private: + + BeliefId getOrAddGridPointId(BeliefType const& gridPoint) { + auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size()); + if (insertioRes.second) { + // There actually was an insertion, so add the new grid state + gridPoints.push_back(gridPoint); + } + // Return the id + return insertioRes.first->second; + } + + PomdpType const& pomdp; + uint64_t resolution; + + std::vector gridPoints; + std::map gridPointToIdMap; + storm::utility::ConstantsComparator cc; + + + }; + } +} \ No newline at end of file