Browse Source

POMDP: Started to split belief logic from exploration logic.

main
Tim Quatmann 5 years ago
parent
commit
6fee61feb1
  1. 343
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 3
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h
  3. 296
      src/storm-pomdp/storage/BeliefGrid.h

343
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -1,5 +1,7 @@
#include "ApproximatePOMDPModelchecker.h" #include "ApproximatePOMDPModelchecker.h"
#include <tuple>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include "storm-pomdp/analysis/FormulaInformation.h" #include "storm-pomdp/analysis/FormulaInformation.h"
@ -21,6 +23,8 @@
#include "storm/api/export.h" #include "storm/api/export.h"
#include "storm-parsers/api/storm-parsers.h" #include "storm-parsers/api/storm-parsers.h"
#include "storm-pomdp/storage/BeliefGrid.h"
#include "storm/utility/macros.h" #include "storm/utility/macros.h"
#include "storm/utility/SignalHandler.h" #include "storm/utility/SignalHandler.h"
#include "storm/exceptions/NotSupportedException.h" #include "storm/exceptions/NotSupportedException.h"
@ -322,6 +326,16 @@ namespace storm {
} }
template <typename ValueType, typename BeliefType, typename SummandsType>
ValueType getWeightedSum(BeliefType const& belief, SummandsType const& summands) {
ValueType result = storm::utility::zero<ValueType>();
for (auto const& entry : belief) {
result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
}
return result;
}
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::shared_ptr<RefinementComponents<ValueType>> std::shared_ptr<RefinementComponents<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min, ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeFirstRefinementStep(std::set<uint32_t> const &targetObservations, bool min,
@ -338,15 +352,14 @@ namespace storm {
underMap = underApproximationMap.value(); underMap = underApproximationMap.value();
} }
std::vector<storm::pomdp::Belief<ValueType>> beliefList;
std::vector<bool> beliefIsTarget;
std::vector<storm::pomdp::Belief<ValueType>> beliefGrid;
storm::storage::BeliefGrid<storm::models::sparse::Pomdp<ValueType>, ValueType> newBeliefGrid(pomdp, options.numericPrecision);
//Use caching to avoid multiple computation of the subsimplices and lambdas //Use caching to avoid multiple computation of the subsimplices and lambdas
std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache; std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> subSimplexCache;
std::map<uint64_t, std::vector<ValueType>> lambdaCache; std::map<uint64_t, std::vector<ValueType>> lambdaCache;
bsmap_type beliefStateMap; bsmap_type beliefStateMap;
std::deque<uint64_t> beliefsToBeExpanded; std::deque<uint64_t> beliefsToBeExpanded;
storm::storage::BitVector expandedBeliefs;
// current ID -> action -> reward // current ID -> action -> reward
std::map<uint64_t, std::vector<ValueType>> beliefActionRewards; std::map<uint64_t, std::vector<ValueType>> beliefActionRewards;
@ -355,244 +368,185 @@ namespace storm {
// Initial belief always has belief ID 0 // Initial belief always has belief ID 0
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(nextId); storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(nextId);
++nextId; ++nextId;
beliefList.push_back(initialBelief);
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
// These are the components to build the MDP from the grid // These are the components to build the MDP from the grid
// Reserve states 0 and 1 as always sink/goal states // Reserve states 0 and 1 as always sink/goal states
std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions = {{{{0, storm::utility::one<ValueType>()}}},
{{{1, storm::utility::one<ValueType>()}}}};
storm::storage::SparseMatrixBuilder<ValueType> mdpTransitionsBuilder(0, 0, 0, true, true);
uint64_t extraBottomState = 0;
uint64_t extraTargetState = computeRewards ? 0 : 1;
uint64_t nextMdpStateId = extraTargetState + 1;
uint64_t mdpMatrixRow = 0;
for (uint64_t state = 0; state < nextMdpStateId; ++state) {
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, state, storm::utility::one<ValueType>());
++mdpMatrixRow;
}
// Hint vector for the MDP modelchecker (initialize with constant sink/goal values) // Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
std::vector<ValueType> hintVector = {storm::utility::zero<ValueType>(), storm::utility::one<ValueType>()};
std::vector<uint64_t> targetStates = {1};
uint64_t mdpStateId = 2;
beliefStateMap.insert(bsmap_type::value_type(initialBelief.id, mdpStateId));
++mdpStateId;
std::vector<ValueType> hintVector(nextMdpStateId, storm::utility::zero<ValueType>());
if (!computeRewards) {
hintVector[extraTargetState] = storm::utility::one<ValueType>();
}
std::vector<uint64_t> targetStates = {extraTargetState};
// Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace // Map to save the weighted values resulting from the preprocessing for the beliefs / indices in beliefSpace
std::map<uint64_t, ValueType> weightedSumOverMap; std::map<uint64_t, ValueType> weightedSumOverMap;
std::map<uint64_t, ValueType> weightedSumUnderMap; std::map<uint64_t, ValueType> weightedSumUnderMap;
// for the initial belief, add the triangulated initial states // for the initial belief, add the triangulated initial states
auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates());
auto initSubSimplex = initTemp.first;
auto initLambdas = initTemp.second;
auto triangulation = newBeliefGrid.triangulateBelief(initialBelief.probabilities, observationResolutionVector[initialBelief.observation]);
if (options.cacheSubsimplices) { if (options.cacheSubsimplices) {
subSimplexCache[0] = initSubSimplex;
lambdaCache[0] = initLambdas;
//subSimplexCache[0] = initSubSimplex;
//lambdaCache[0] = initLambdas;
} }
std::vector<std::map<uint64_t, ValueType>> initTransitionsInBelief; std::vector<std::map<uint64_t, ValueType>> initTransitionsInBelief;
std::map<uint64_t, ValueType> initTransitionInActionBelief;
bool initInserted = false;
for (size_t j = 0; j < initLambdas.size(); ++j) {
if (!cc.isEqual(initLambdas[j], storm::utility::zero<ValueType>())) {
uint64_t searchResult = getBeliefIdInVector(beliefList, initialBelief.observation, initSubSimplex[j]);
if (searchResult == uint64_t(-1) || (searchResult == 0 && !initInserted)) {
if (searchResult == 0) {
// the initial belief is on the grid itself
if (boundMapsSet) {
auto tempWeightedSumOver = storm::utility::zero<ValueType>();
auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
}
weightedSumOverMap[initialBelief.id] = tempWeightedSumOver;
weightedSumUnderMap[initialBelief.id] = tempWeightedSumUnder;
}
initInserted = true;
beliefGrid.push_back(initialBelief);
beliefsToBeExpanded.push_back(0);
hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>());
} else {
// if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
if (boundMapsSet) {
auto tempWeightedSumOver = storm::utility::zero<ValueType>();
auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
for (uint64_t i = 0; i < initSubSimplex[j].size(); ++i) {
tempWeightedSumOver += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
tempWeightedSumUnder += initSubSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
}
weightedSumOverMap[nextId] = tempWeightedSumOver;
weightedSumUnderMap[nextId] = tempWeightedSumUnder;
}
storm::pomdp::Belief<ValueType> gridBelief = {nextId, initialBelief.observation, initSubSimplex[j]};
beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end());
beliefsToBeExpanded.push_back(nextId);
++nextId;
hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
uint64_t initialMdpState = nextMdpStateId;
++nextMdpStateId;
if (triangulation.size() == 1) {
// The initial belief is on the grid itself
auto initBeliefId = triangulation.gridPoints.front();
if (boundMapsSet) {
auto const& gridPoint = newBeliefGrid.getGridPoint(initBeliefId);
weightedSumOverMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, overMap);
weightedSumUnderMap[initBeliefId] = getWeightedSum<ValueType>(gridPoint, underMap);
}
beliefsToBeExpanded.push_back(initBeliefId);
beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints.front(), initialMdpState));
hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>()); : storm::utility::zero<ValueType>());
beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
initTransitionInActionBelief[mdpStateId] = initLambdas[j];
++nextId;
++mdpStateId;
}
} else {
// If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
for (uint64_t i = 0; i < triangulation.size(); ++i) {
beliefsToBeExpanded.push_back(triangulation.gridPoints[i]);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, nextMdpStateId, triangulation.weights[i]);
beliefStateMap.insert(bsmap_type::value_type(triangulation.gridPoints[i], nextMdpStateId));
++nextMdpStateId;
if (boundMapsSet) {
auto const& gridPoint = newBeliefGrid.getGridPoint(triangulation.gridPoints[i]);
weightedSumOverMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, overMap);
weightedSumUnderMap[triangulation.gridPoints[i]] = getWeightedSum<ValueType>(gridPoint, underMap);
} }
hintVector.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>());
} }
//beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
++mdpMatrixRow;
} }
// If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
if (!initTransitionInActionBelief.empty()) {
initTransitionsInBelief.push_back(initTransitionInActionBelief);
mdpTransitions.push_back(initTransitionsInBelief);
}
//beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting
// Expand the beliefs to generate the grid on-the-fly // Expand the beliefs to generate the grid on-the-fly
if (options.explorationThreshold > storm::utility::zero<ValueType>()) { if (options.explorationThreshold > storm::utility::zero<ValueType>()) {
STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl) STORM_PRINT("Exploration threshold: " << options.explorationThreshold << std::endl)
} }
expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
while (!beliefsToBeExpanded.empty()) { while (!beliefsToBeExpanded.empty()) {
uint64_t currId = beliefsToBeExpanded.front(); uint64_t currId = beliefsToBeExpanded.front();
beliefsToBeExpanded.pop_front();
bool isTarget = beliefIsTarget[currId];
if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
// TODO: with rewards whe would have to assign the corresponding reward to this transition
mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
continue;
}
if (isTarget) {
// Depending on whether we compute rewards, we select the right initial result
// MDP stuff
targetStates.push_back(beliefStateMap.left.at(currId));
mdpTransitions.push_back({{{beliefStateMap.left.at(currId), storm::utility::one<ValueType>()}}});
beliefsToBeExpanded.pop_front();
expandedBeliefs.set(currId, true); // Do not expand this belief again.
assert(currId < expandedBeliefs.size());
uint64_t currMdpState = beliefStateMap.left.at(currId);
auto const& currBelief = newBeliefGrid.getGridPoint(currId);
uint32_t currObservation = pomdp.getObservation(currBelief.begin()->first);
mdpTransitionsBuilder.newRowGroup(mdpMatrixRow);
if (targetObservations.count(currObservation) != 0) {
// Make this state absorbing
targetStates.push_back(currMdpState);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, currMdpState, storm::utility::one<ValueType>());
++mdpMatrixRow;
} else if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
// TODO: with rewards we would have to assign the corresponding reward to this transition
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraTargetState, weightedSumOverMap[currId]);
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, extraBottomState, storm::utility::one<ValueType>() - weightedSumOverMap[currId]);
++mdpMatrixRow;
} else { } else {
uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front();
uint64_t numChoices = pomdp.getNumberOfChoices(representativeState);
auto const& currBelief = newBeliefGrid.getGridPoint(currId);
uint64_t someState = currBelief.begin()->first;
uint64_t numChoices = pomdp.getNumberOfChoices(someState);
std::vector<ValueType> actionRewardsInState(numChoices); std::vector<ValueType> actionRewardsInState(numChoices);
std::vector<std::map<uint64_t, ValueType>> transitionsInBelief; std::vector<std::map<uint64_t, ValueType>> transitionsInBelief;
for (uint64_t action = 0; action < numChoices; ++action) { for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(beliefList[currId], action);
std::map<uint64_t, ValueType> transitionInActionBelief;
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
// THIS CALL IS SLOW
// TODO speed this up
uint64_t idNextBelief = getBeliefAfterActionAndObservation(beliefList, beliefIsTarget, targetObservations, beliefList[currId], action,
observation, nextId);
nextId = beliefList.size();
//Triangulate here and put the possibly resulting belief in the grid
std::vector<std::map<uint64_t, ValueType>> subSimplex;
std::vector<ValueType> lambdas;
if (options.cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) {
subSimplex = subSimplexCache[idNextBelief];
lambdas = lambdaCache[idNextBelief];
} else {
auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities,
observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates());
subSimplex = temp.first;
lambdas = temp.second;
if (options.cacheSubsimplices) {
subSimplexCache[idNextBelief] = subSimplex;
lambdaCache[idNextBelief] = lambdas;
}
}
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
auto approxId = getBeliefIdInVector(beliefGrid, observation, subSimplex[j]);
if (approxId == uint64_t(-1)) {
// if the triangulated belief was not found in the list, we place it in the grid and add it to the work list
storm::pomdp::Belief<ValueType> gridBelief = {nextId, observation, subSimplex[j]};
beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(targetObservations.find(observation) != targetObservations.end());
// compute overapproximate value using MDP result map
if (boundMapsSet) {
auto tempWeightedSumOver = storm::utility::zero<ValueType>();
auto tempWeightedSumUnder = storm::utility::zero<ValueType>();
for (uint64_t i = 0; i < subSimplex[j].size(); ++i) {
tempWeightedSumOver += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(overMap[i]);
tempWeightedSumUnder += subSimplex[j][i] * storm::utility::convertNumber<ValueType>(underMap[i]);
}
if (cc.isEqual(tempWeightedSumOver, tempWeightedSumUnder)) {
hintVector.push_back(tempWeightedSumOver);
} else {
hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>());
}
weightedSumOverMap[nextId] = tempWeightedSumOver;
weightedSumUnderMap[nextId] = tempWeightedSumUnder;
} else {
hintVector.push_back(targetObservations.find(observation) != targetObservations.end() ? storm::utility::one<ValueType>()
: storm::utility::zero<ValueType>());
}
beliefsToBeExpanded.push_back(nextId);
beliefStateMap.insert(bsmap_type::value_type(nextId, mdpStateId));
transitionInActionBelief[mdpStateId] = iter->second * lambdas[j];
++nextId;
++mdpStateId;
auto successorGridPoints = newBeliefGrid.expandAction(currId, action, observationResolutionVector);
// Check for newly found grid points
expandedBeliefs.grow(newBeliefGrid.getNumberOfGridPointIds(), false);
for (auto const& successor : successorGridPoints) {
auto successorId = successor.first;
auto successorBelief = newBeliefGrid.getGridPoint(successorId);
auto successorObservation = pomdp.getObservation(successorBelief.begin()->first);
if (!expandedBeliefs.get(successorId)) {
beliefsToBeExpanded.push_back(successorId);
beliefStateMap.insert(bsmap_type::value_type(successorId, nextMdpStateId));
++nextMdpStateId;
if (boundMapsSet) {
ValueType upperBound = getWeightedSum<ValueType>(successorBelief, overMap);
ValueType lowerBound = getWeightedSum<ValueType>(successorBelief, underMap);
if (cc.isEqual(upperBound, lowerBound)) {
hintVector.push_back(lowerBound);
} else { } else {
transitionInActionBelief[beliefStateMap.left.at(approxId)] = iter->second * lambdas[j];
hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
} }
weightedSumOverMap[successorId] = upperBound;
weightedSumUnderMap[successorId] = lowerBound;
} else {
hintVector.push_back(targetObservations.count(successorObservation) == 1 ? storm::utility::one<ValueType>() : storm::utility::zero<ValueType>());
} }
} }
auto successorMdpState = beliefStateMap.left.at(successorId);
// This assumes that the successor MDP states are given in ascending order, which is indeed the case because the successorGridPoints are sorted.
mdpTransitionsBuilder.addNextValue(mdpMatrixRow, successorMdpState, successor.second);
} }
if (!transitionInActionBelief.empty()) {
transitionsInBelief.push_back(transitionInActionBelief);
}
++mdpMatrixRow;
} }
if (transitionsInBelief.empty()) {
std::map<uint64_t, ValueType> transitionInActionBelief;
transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
transitionsInBelief.push_back(transitionInActionBelief);
}
mdpTransitions.push_back(transitionsInBelief);
} }
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
statistics.overApproximationBuildAborted = true; statistics.overApproximationBuildAborted = true;
break; break;
} }
} }
statistics.overApproximationStates = mdpTransitions.size();
STORM_PRINT("Grid size: " << beliefGrid.size() << std::endl);
statistics.overApproximationStates = nextMdpStateId;
STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
if (storm::utility::resources::isTerminate()) { if (storm::utility::resources::isTerminate()) {
statistics.overApproximationBuildTime.stop(); statistics.overApproximationBuildTime.stop();
return nullptr; return nullptr;
} }
storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size());
storm::models::sparse::StateLabeling mdpLabeling(nextMdpStateId);
mdpLabeling.addLabel("init"); mdpLabeling.addLabel("init");
mdpLabeling.addLabel("target"); mdpLabeling.addLabel("target");
mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialBelief.id));
mdpLabeling.addLabelToState("init", beliefStateMap.left.at(initialMdpState));
for (auto targetState : targetStates) { for (auto targetState : targetStates) {
mdpLabeling.addLabelToState("target", targetState); mdpLabeling.addLabelToState("target", targetState);
} }
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling);
storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(mdpTransitionsBuilder.build(mdpMatrixRow, nextMdpStateId, nextMdpStateId), std::move(mdpLabeling));
for (uint64_t row = 0; row < modelComponents.transitionMatrix.getRowCount(); ++row) {
if (!storm::utility::isOne(modelComponents.transitionMatrix.getRowSum(row))) {
std::cout << "Row " << row << " does not sum up to one. " << modelComponents.transitionMatrix.getRowSum(row) << " instead" << std::endl;
}
}
auto overApproxMdp = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(std::move(modelComponents));
if (computeRewards) { if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(mdpMatrixRow));
for (auto const &iter : beliefStateMap.left) { for (auto const &iter : beliefStateMap.left) {
auto currentBelief = beliefList[iter.first];
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
auto currentBelief = newBeliefGrid.getGridPoint(iter.first);
auto representativeState = currentBelief.begin()->first;
for (uint64_t action = 0; action < overApproxMdp->getNumberOfChoices(representativeState); ++action) {
// Add the reward // Add the reward
mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
currentBelief));
uint64_t mdpChoice = overApproxMdp->getChoiceIndex(storm::storage::StateActionPair(iter.second, action));
uint64_t pomdpChoice = pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action));
mdpRewardModel.setStateActionReward(mdpChoice, getRewardAfterAction(pomdpChoice, currentBelief));
} }
} }
overApproxMdp.addRewardModel("std", mdpRewardModel);
overApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
overApproxMdp->addRewardModel("default", mdpRewardModel);
overApproxMdp->restrictRewardModels(std::set<std::string>({"default"}));
} }
statistics.overApproximationBuildTime.stop(); statistics.overApproximationBuildTime.stop();
STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl); STORM_PRINT("Over Approximation MDP build took " << statistics.overApproximationBuildTime << " seconds." << std::endl);
overApproxMdp.printModelInformationToStream(std::cout);
overApproxMdp->printModelInformationToStream(std::cout);
auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);
auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model);
auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(overApproxMdp);
std::string propertyString = computeRewards ? "R" : "P"; std::string propertyString = computeRewards ? "R" : "P";
propertyString += min ? "min" : "max"; propertyString += min ? "min" : "max";
propertyString += "=? [F \"target\"]"; propertyString += "=? [F \"target\"]";
@ -604,31 +558,34 @@ namespace storm {
auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint); auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
task.setHint(hintPtr); task.setHint(hintPtr);
statistics.overApproximationCheckTime.start(); statistics.overApproximationCheckTime.start();
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, task));
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(overApproxMdp, task));
statistics.overApproximationCheckTime.stop(); statistics.overApproximationCheckTime.stop();
if (storm::utility::resources::isTerminate() && !res) { if (storm::utility::resources::isTerminate() && !res) {
return nullptr; return nullptr;
} }
STORM_LOG_ASSERT(res, "Result does not exist."); STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp.getNumberOfStates(), true)));
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(overApproxMdp->getNumberOfStates(), true)));
auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap(); auto overApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)]; auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)];
STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl); STORM_PRINT("Time Overapproximation: " << statistics.overApproximationCheckTime << " seconds." << std::endl);
//auto underApprox = weightedSumUnderMap[initialBelief.id];
auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards,
maxUaModelSize);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
//auto underApprox = weightedSumUnderMap[initialBelief.id];
/* TODO: Enable under approx again:
auto underApproxComponents = computeUnderapproximation(beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
if (storm::utility::resources::isTerminate() && !underApproxComponents) { if (storm::utility::resources::isTerminate() && !underApproxComponents) {
return std::make_unique<RefinementComponents<ValueType>>( return std::make_unique<RefinementComponents<ValueType>>(
RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id}); RefinementComponents<ValueType>{modelPtr, overApprox, 0, overApproxResultMap, {}, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, {}, initialBelief.id});
} }
STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
return std::make_unique<RefinementComponents<ValueType>>( return std::make_unique<RefinementComponents<ValueType>>(
RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap, underApproxComponents->underApproxMap, beliefList, beliefGrid, beliefIsTarget, beliefStateMap,
underApproxComponents->underApproxBeliefStateMap, initialBelief.id}); underApproxComponents->underApproxBeliefStateMap, initialBelief.id});
*/
return std::make_unique<RefinementComponents<ValueType>>(RefinementComponents<ValueType>{modelPtr, overApprox, storm::utility::zero<ValueType>(), overApproxResultMap,
{}, {}, {}, {}, beliefStateMap, {}, initialBelief.id});
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
@ -973,7 +930,8 @@ namespace storm {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution); std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), options.initialGridResolution);
return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false); return computeReachabilityOTF(targetObservations, min, observationResolutionVector, false);
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList, ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
@ -1307,7 +1265,16 @@ namespace storm {
} }
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief) {
ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief) {
auto result = storm::utility::zero<ValueType>();
for (auto const &probEntry : belief) {
result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());
}
return result;
}
template<typename ValueType, typename RewardModelType>
ValueType ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief) {
auto result = storm::utility::zero<ValueType>(); auto result = storm::utility::zero<ValueType>();
for (auto const &probEntry : belief.probabilities) { for (auto const &probEntry : belief.probabilities) {
result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix());

3
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -240,7 +240,8 @@ namespace storm {
* @param belief the belief in which the action is performed * @param belief the belief in which the action is performed
* @return the reward earned by performing the action in the belief * @return the reward earned by performing the action in the belief
*/ */
ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> &belief);
ValueType getRewardAfterAction(uint64_t action, storm::pomdp::Belief<ValueType> const& belief);
ValueType getRewardAfterAction(uint64_t action, std::map<uint64_t, ValueType> const& belief);
struct Statistics { struct Statistics {
Statistics(); Statistics();

296
src/storm-pomdp/storage/BeliefGrid.h

@ -0,0 +1,296 @@
#pragma once
#include <map>
#include <boost/optional.hpp>
//#include <boost/container/flat_map.hpp>
#include "storm/utility/macros.h"
#include "storm/exceptions/UnexpectedException.h"
namespace storm {
namespace storage {
template <typename PomdpType, typename BeliefValueType, typename StateType = uint64_t>
class BeliefGrid {
public:
typedef typename PomdpType::ValueType ValueType;
//typedef boost::container::flat_map<StateType, BeliefValueType> BeliefType
typedef std::map<StateType, BeliefValueType> BeliefType;
typedef uint64_t BeliefId;
BeliefGrid(PomdpType const& pomdp, BeliefValueType const& precision) : pomdp(pomdp), cc(precision, false) {
// Intentionally left empty
}
struct Triangulation {
std::vector<BeliefId> gridPoints;
std::vector<BeliefValueType> weights;
uint64_t size() const {
return weights.size();
}
};
BeliefType const& getGridPoint(BeliefId const& id) const {
return gridPoints[id];
}
BeliefId getIdOfGridPoint(BeliefType const& gridPoint) const {
auto idIt = gridPointToIdMap.find(gridPoint);
STORM_LOG_THROW(idIt != gridPointToIdMap.end(), storm::exceptions::UnexpectedException, "Unknown grid state.");
return idIt->second;
}
bool isEqual(BeliefType const& first, BeliefType const& second) const {
if (first.size() != second.size()) {
return false;
}
auto secondIt = second.begin();
for (auto const& firstEntry : first) {
if (firstEntry.first != secondIt->first) {
return false;
}
if (!cc.isEqual(firstEntry.second, secondIt->second)) {
return false;
}
++secondIt;
}
return true;
}
bool assertBelief(BeliefType const& belief) const {
BeliefValueType sum = storm::utility::zero<ValueType>();
boost::optional<uint32_t> observation;
for (auto const& entry : belief) {
uintmax_t entryObservation = pomdp.getObservation(entry.first);
if (observation) {
if (observation.get() != entryObservation) {
STORM_LOG_ERROR("Beliefsupport contains different observations.");
return false;
}
} else {
observation = entryObservation;
}
if (cc.isZero(entry.second)) {
// We assume that beliefs only consider their support.
STORM_LOG_ERROR("Zero belief probability.");
return false;
}
if (cc.isLess(entry.second, storm::utility::zero<BeliefValueType>())) {
STORM_LOG_ERROR("Negative belief probability.");
return false;
}
if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) {
STORM_LOG_ERROR("Belief probability greater than one.");
return false;
}
sum += entry.second;
}
if (!cc.isOne(sum)) {
STORM_LOG_ERROR("Belief does not sum up to one.");
return false;
}
return true;
}
bool assertTriangulation(BeliefType const& belief, Triangulation const& triangulation) const {
if (triangulation.weights.size() != triangulation.gridPoints.size()) {
STORM_LOG_ERROR("Number of weights and points in triangulation does not match.");
return false;
}
if (triangulation.size() == 0) {
STORM_LOG_ERROR("Empty triangulation.");
return false;
}
BeliefType triangulatedBelief;
BeliefValueType weightSum = storm::utility::zero<BeliefValueType>();
for (uint64_t i = 0; i < triangulation.weights.size(); ++i) {
if (cc.isZero(triangulation.weights[i])) {
STORM_LOG_ERROR("Zero weight in triangulation.");
return false;
}
if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) {
STORM_LOG_ERROR("Negative weight in triangulation.");
return false;
}
if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) {
STORM_LOG_ERROR("Weight greater than one in triangulation.");
}
weightSum += triangulation.weights[i];
BeliefType const& gridPoint = getGridPoint(triangulation.gridPoints[i]);
for (auto const& pointEntry : gridPoint) {
BeliefValueType& triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<ValueType>()).first->second;
triangulatedValue += triangulation.weights[i] * pointEntry.second;
}
}
if (!cc.isOne(weightSum)) {
STORM_LOG_ERROR("Triangulation weights do not sum up to one.");
return false;
}
if (!assertBelief(triangulatedBelief)) {
STORM_LOG_ERROR("Triangulated belief is not a belief.");
}
if (!isEqual(belief, triangulatedBelief)) {
STORM_LOG_ERROR("Belief does not match triangulated belief.");
return false;
}
return true;
}
Triangulation triangulateBelief(BeliefType belief, uint64_t resolution) {
//TODO this can also be simplified using the sparse vector interpretation
//TODO Enable chaching for this method?
STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
auto nrStates = pomdp.getNumberOfStates();
// This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
// Variable names are based on the paper
// TODO avoid reallocations for these vectors
std::vector<BeliefValueType> x(nrStates);
std::vector<BeliefValueType> v(nrStates);
std::vector<BeliefValueType> d(nrStates);
auto convResolution = storm::utility::convertNumber<BeliefValueType>(resolution);
for (size_t i = 0; i < nrStates; ++i) {
for (auto const &probEntry : belief) {
if (probEntry.first >= i) {
x[i] += convResolution * probEntry.second;
}
}
v[i] = storm::utility::floor(x[i]);
d[i] = x[i] - v[i];
}
auto p = storm::utility::vector::getSortedIndices(d);
std::vector<std::vector<BeliefValueType>> qs(nrStates, std::vector<BeliefValueType>(nrStates));
for (size_t i = 0; i < nrStates; ++i) {
if (i == 0) {
for (size_t j = 0; j < nrStates; ++j) {
qs[i][j] = v[j];
}
} else {
for (size_t j = 0; j < nrStates; ++j) {
if (j == p[i - 1]) {
qs[i][j] = qs[i - 1][j] + storm::utility::one<BeliefValueType>();
} else {
qs[i][j] = qs[i - 1][j];
}
}
}
}
Triangulation result;
// The first weight is 1-sum(other weights). We therefore process the js in reverse order
BeliefValueType firstWeight = storm::utility::one<BeliefValueType>();
for (size_t j = nrStates; j > 0;) {
--j;
// First create the weights. The weights vector will be reversed at the end.
ValueType weight;
if (j == 0) {
weight = firstWeight;
} else {
weight = d[p[j - 1]] - d[p[j]];
firstWeight -= weight;
}
if (!cc.isZero(weight)) {
result.weights.push_back(weight);
BeliefType gridPoint;
auto const& qsj = qs[j];
for (size_t i = 0; i < nrStates - 1; ++i) {
BeliefValueType gridPointEntry = qsj[i] - qsj[i + 1];
if (!cc.isZero(gridPointEntry)) {
gridPoint[i] = gridPointEntry / convResolution;
}
}
if (!cc.isZero(qsj[nrStates - 1])) {
gridPoint[nrStates - 1] = qsj[nrStates - 1] / convResolution;
}
result.gridPoints.push_back(getOrAddGridPointId(gridPoint));
}
}
std::reverse(result.weights.begin(), result.weights.end());
STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation.");
return result;
}
template<typename DistributionType>
void addToDistribution(DistributionType& distr, StateType const& state, BeliefValueType const& value) {
auto insertionRes = distr.emplace(state, value);
if (!insertionRes.second) {
insertionRes.first->second += value;
}
}
BeliefId getNumberOfGridPointIds() const {
return gridPoints.size();
}
std::map<BeliefId, ValueType> expandAction(BeliefId const& gridPointId, uint64_t actionIndex, std::vector<uint64_t> const& observationResolutions) {
std::map<BeliefId, ValueType> destinations; // The belief ids should be ordered
BeliefType gridPoint = getGridPoint(gridPointId);
// Find the probability we go to each observation
BeliefType successorObs; // This is actually not a belief but has the same type
for (auto const& pointEntry : gridPoint) {
uint64_t state = pointEntry.first;
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
if (!storm::utility::isZero(pomdpTransition.getValue())) {
auto obs = pomdp.getObservation(pomdpTransition.getColumn());
addToDistribution(successorObs, obs, pointEntry.second * pomdpTransition.getValue());
}
}
}
// Now for each successor observation we find and triangulate the successor belief
for (auto const& successor : successorObs) {
BeliefType successorBelief;
for (auto const& pointEntry : gridPoint) {
uint64_t state = pointEntry.first;
for (auto const& pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) {
ValueType prob = pointEntry.second * pomdpTransition.getValue() / successor.second;
addToDistribution(successorBelief, pomdpTransition.getColumn(), prob);
}
}
}
STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
Triangulation triangulation = triangulateBelief(successorBelief, observationResolutions[successor.first]);
for (size_t j = 0; j < triangulation.size(); ++j) {
addToDistribution(destinations, triangulation.gridPoints[j], triangulation.weights[j] * successor.second);
}
}
return destinations;
}
private:
BeliefId getOrAddGridPointId(BeliefType const& gridPoint) {
auto insertioRes = gridPointToIdMap.emplace(gridPoint, gridPoints.size());
if (insertioRes.second) {
// There actually was an insertion, so add the new grid state
gridPoints.push_back(gridPoint);
}
// Return the id
return insertioRes.first->second;
}
PomdpType const& pomdp;
uint64_t resolution;
std::vector<BeliefType> gridPoints;
std::map<BeliefType, BeliefId> gridPointToIdMap;
storm::utility::ConstantsComparator<ValueType> cc;
};
}
}
Loading…
Cancel
Save