Browse Source

Added naive underapproximation by unrolling the belief support up to a given size

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
a5fc9fc5b7
  1. 281
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 17
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

281
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -88,9 +88,10 @@ namespace storm {
std::vector<uint64_t> observationResolutionVector(pomdp.getNrObservations(), gridResolution);
auto overRes = storm::utility::one<ValueType>();
auto underRes = storm::utility::zero<ValueType>();
uint64_t underApproxModelSize = 50;
uint64_t refinementCounter = 1;
std::unique_ptr<POMDPCheckResult<ValueType>> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
overApproxMap, underApproxMap);
overApproxMap, underApproxMap, underApproxModelSize);
// TODO the actual refinement
return res;
}
@ -102,10 +103,11 @@ namespace storm {
std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, double explorationThreshold,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
uint64_t maxUaModelSize) {
STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl)
auto result = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, computeRewards, explorationThreshold, overApproximationMap,
underApproximationMap);
underApproximationMap, maxUaModelSize);
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{result->overApproxValue, result->underApproxValue});
}
@ -117,7 +119,8 @@ namespace storm {
std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, double explorationThreshold,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap) {
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap,
uint64_t maxUaModelSize) {
bool boundMapsSet = overApproximationMap && underApproximationMap;
std::map<uint64_t, ValueType> overMap;
std::map<uint64_t, ValueType> underMap;
@ -413,183 +416,8 @@ namespace storm {
auto overApprox = overApproxResultMap[beliefStateMap.left.at(initialBelief.id)];
STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl)
// Prototypical implementation of the underapproximation - WRONG
/*
// The map has the following form: (beliefId, action) --> stateId
uamap_type uaStateMap;
// Reserve states 0 and 1 as always sink/goal states
std::map<uint64_t, std::vector<std::map<uint64_t, ValueType>>> uaTransitions = {{0,{{{0, storm::utility::one<ValueType>()}}}},
{1,{{{1, storm::utility::one<ValueType>()}}}}};
// Hint vector for the MDP modelchecker (initialize with constant sink/goal values)
std::vector<uint64_t> uaTargetStates = {1};
uint64_t uaStateId = 2;
// for beliefs which are both in the actual belief support and the grid, we use the max value for the action to indicate the support belief
uaStateMap.insert(uamap_type::value_type(std::make_pair(initialBelief.id, std::numeric_limits<uint64_t>::max()), uaStateId));
++uaStateId;
beliefsToBeExpanded.push_back(initialBelief.id);
while(!beliefsToBeExpanded.empty()){
uint64_t currId = beliefsToBeExpanded.front();
beliefsToBeExpanded.pop_front();
bool isTarget = beliefIsTarget[currId];
if(isTarget){
// For target states we add a self-loop
uaTargetStates.push_back(uaStateMap.left.at(std::make_pair(currId, std::numeric_limits<uint64_t>::max())));
uaTransitions[uaStateMap.left.at(std::make_pair(currId, std::numeric_limits<uint64_t>::max()))] = {{{uaStateMap.left.at(std::make_pair(currId, std::numeric_limits<uint64_t>::max())), storm::utility::one<ValueType>()}}};
} else {
uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currId].observation).front());
//Triangulate the current belief to determine its approximation bases
std::vector<std::vector<ValueType>> subSimplex;
std::vector<ValueType> lambdas;
if (cacheSubsimplices && subSimplexCache.count(currId) > 0) {
subSimplex = subSimplexCache[currId];
lambdas = lambdaCache[currId];
} else {
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
beliefList[currId].probabilities, observationResolutionVector[beliefList[currId].observation]);
subSimplex = temp.first;
lambdas = temp.second;
if(cacheSubsimplices){
subSimplexCache[currId] = subSimplex;
lambdaCache[currId] = lambdas;
}
}
std::deque<std::pair<uint64_t,uint64_t>> approxToExpand;
std::vector<std::map<uint64_t, ValueType>> approxActionTransitions(numChoices);
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())){
uint64_t approxId = getBeliefIdInVector(beliefGrid, beliefList[currId].observation, subSimplex[j]);
//STORM_PRINT("ApproxId " << approxId << std::endl)
if (approxId == uint64_t(-1)) {
// If the approximation base is not yet in the grid, we add it and it has to be expanded
storm::pomdp::Belief<ValueType> gridBelief = {nextId, beliefList[currId].observation, subSimplex[j]};
beliefList.push_back(gridBelief);
beliefGrid.push_back(gridBelief);
beliefIsTarget.push_back(targetObservations.find(beliefList[currId].observation) != targetObservations.end());
for(uint64_t action=0; action < numChoices; ++action) {
approxToExpand.push_back(std::make_pair(nextId, action));
uaStateMap.insert(uamap_type::value_type(std::make_pair(nextId, action), uaStateId));
approxActionTransitions[action][uaStateId] = lambdas[j];
++uaStateId;
}
++nextId;
} else if(uaStateMap.left.find(std::pair<uint64_t,uint64_t>(approxId,0)) != uaStateMap.left.end()){
// we can check only for (approxId,0) as that always exists if the grid state is mapped
for(uint64_t action=0; action < numChoices; ++action) {
approxActionTransitions[action][uaStateMap.left.at(std::make_pair(approxId,action))] = lambdas[j];
}
} else {
for(uint64_t action=0; action < numChoices; ++action) {
approxToExpand.push_back(std::make_pair(approxId, action));
uaStateMap.insert(uamap_type::value_type(std::make_pair(approxId, action), uaStateId));
approxActionTransitions[action][uaStateId] = lambdas[j];
++uaStateId;
}
}
}
}
uaTransitions[uaStateMap.left.at(std::make_pair(currId,std::numeric_limits<uint64_t>::max()))] = approxActionTransitions;
// Now expand all approximation bases
while(!approxToExpand.empty()){
uint64_t approxId = approxToExpand.front().first;
uint64_t approxAction = approxToExpand.front().second;
approxToExpand.pop_front();
// Iterate over all actions and determine the successor states
std::map<uint64_t, ValueType> transitionsInAction;
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[approxId], approxAction);
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[approxId], approxAction,
observation, nextId);
nextId = beliefList.size();
if(uaStateMap.left.find(std::make_pair(idNextBelief, std::numeric_limits<uint64_t>::max())) == uaStateMap.left.end()){
// add state to the mapping and set it t be expanded
uaStateMap.insert(uamap_type::value_type(std::make_pair(idNextBelief,std::numeric_limits<uint64_t>::max()), uaStateId));
++uaStateId;
beliefsToBeExpanded.push_back(idNextBelief);
}
transitionsInAction[uaStateMap.left.at(std::make_pair(idNextBelief,std::numeric_limits<uint64_t>::max()))] = iter->second;
}
uaTransitions[uaStateMap.left.at(std::make_pair(approxId,approxAction))] = {transitionsInAction};
}
}
}
std::vector<std::vector<std::map<uint64_t, ValueType>>> uaTransitionVector;
for(auto iter = uaTransitions.begin(); iter != uaTransitions.end(); ++iter){
uaTransitionVector.push_back(iter->second);
}
STORM_PRINT(uaTransitions.size() << std::endl)
storm::models::sparse::StateLabeling uaLabeling(uaTransitions.size());
uaLabeling.addLabel("init");
uaLabeling.addLabel("target");
uaLabeling.addLabel("belief");
uaLabeling.addLabel("grid");
uaLabeling.addLabelToState("init", uaStateMap.left.at(std::make_pair(initialBelief.id,std::numeric_limits<uint64_t>::max())));
for (auto targetState : uaTargetStates) {
uaLabeling.addLabelToState("target", targetState);
}
for (auto &iter : uaStateMap.right) {
std::stringstream mapEntryStr;
mapEntryStr << std::to_string(iter.first);
mapEntryStr << " --> ";
mapEntryStr << "[{" + std::to_string(beliefList[iter.second.first].observation) << "} | " ;
for(uint64_t state = 0; state < beliefList[iter.second.first].probabilities.size(); ++state){
if(beliefList[iter.second.first].probabilities[state] > storm::utility::zero<ValueType>()){
mapEntryStr << std::to_string(state) << " : " << beliefList[iter.second.first].probabilities[state] << ", ";
}
}
mapEntryStr << "]" << std::endl;
STORM_PRINT(mapEntryStr.str());
if(!uaLabeling.containsLabel(mapEntryStr.str())){
uaLabeling.addLabel(mapEntryStr.str());
}
if(iter.second.second == std::numeric_limits<uint64_t>::max()){
uaLabeling.addLabelToState("belief", iter.first);
} else {
uaLabeling.addLabelToState("grid", iter.first);
}
uaLabeling.addLabelToState(mapEntryStr.str(), iter.first);
}
//STORM_PRINT(buildTransitionMatrix(uaTransitionVector))
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> uaModelComponents(buildTransitionMatrix(uaTransitionVector), uaLabeling);
storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(uaModelComponents);
if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> uaMdpRewardModel(boost::none, std::vector<ValueType>(uaModelComponents.transitionMatrix.getRowCount()));
for (auto const &iter : uaStateMap.left) {
auto currentBelief = beliefList[iter.first.first];
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
// Add the reward
uaMdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
currentBelief));
}
}
underApproxMdp.addRewardModel("std", uaMdpRewardModel);
underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
}
underApproxMdp.printModelInformationToStream(std::cout);
auto uaModel = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
auto uaModelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(uaModel);
storm::api::exportSparseModelAsDot(uaModelPtr, "ua_model.dot");
auto uaTask = storm::api::createTask<ValueType>(property, false);
storm::utility::Stopwatch underApproxTimer(true);
std::unique_ptr<storm::modelchecker::CheckResult> uaRes(storm::api::verifyWithSparseEngine<ValueType>(uaModelPtr, uaTask));
underApproxTimer.stop();
STORM_LOG_ASSERT(uaRes, "Result not exist.");
uaRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
auto underApproxResultMap = uaRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto underApprox = underApproxResultMap[uaStateMap.left.at(std::make_pair(initialBelief.id, std::numeric_limits<uint64_t>::max()))];
*/
auto underApprox = weightedSumUnderMap[initialBelief.id];
//auto underApprox = weightedSumUnderMap[initialBelief.id];
auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
@ -837,10 +665,10 @@ namespace storm {
// Now onto the under-approximation
storm::utility::Stopwatch underApproxTimer(true);
ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);
/*ValueType underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves,
result, chosenActions, gridResolution, initialBelief.id, min, computeRewards, useMdp);*/
underApproxTimer.stop();
auto underApprox = storm::utility::zero<ValueType>();
STORM_PRINT("Time Belief Grid Generation: " << beliefGridTimer << std::endl
<< "Time Overapproximation: " << overApproxTimer
<< std::endl
@ -874,64 +702,60 @@ namespace storm {
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> const &targetObservations,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, std::vector<uint64_t>> &chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min,
bool computeRewards, bool generateMdp) {
uint64_t initialBeliefId, bool min,
bool computeRewards, uint64_t maxModelSize) {
std::set<uint64_t> visitedBelieves;
std::deque<uint64_t> believesToBeExpanded;
std::map<uint64_t, uint64_t> beliefStateMap;
std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions;
std::vector<uint64_t> targetStates;
std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
{{{1, storm::utility::one<ValueType>()}}}};
std::vector<uint64_t> targetStates = {1};
uint64_t stateId = 0;
uint64_t stateId = 2;
beliefStateMap[initialBeliefId] = stateId;
++stateId;
uint64_t nextId = beliefList.size();
uint64_t counter = 0;
// Expand the believes
visitedBelieves.insert(initialBeliefId);
believesToBeExpanded.push_back(initialBeliefId);
while (!believesToBeExpanded.empty()) {
//TODO think of other ways to stop exploration besides model size
auto currentBeliefId = believesToBeExpanded.front();
std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front());
// for targets, we only consider one action with one transition
if (beliefIsTarget[currentBeliefId]) {
// add a self-loop to target states and save them
std::map<uint64_t, ValueType> transitionsInStateWithAction;
transitionsInStateWithAction[beliefStateMap[currentBeliefId]] = storm::utility::one<ValueType>();
// add a self-loop to target states
targetStates.push_back(beliefStateMap[currentBeliefId]);
actionTransitionStorage.push_back(transitionsInStateWithAction);
transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one<ValueType>()}}});
} else if (counter > maxModelSize) {
transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
} else {
if (chosenActions.find(currentBeliefId) == chosenActions.end()) {
chosenActions[currentBeliefId] = generateMdp ? extractBestActions(pomdp, beliefList, beliefIsTarget, targetObservations,
observationProbabilities,
nextBelieves, result, gridResolution,
currentBeliefId, beliefList.size(), min) :
extractBestAction(pomdp, beliefList, beliefIsTarget, targetObservations,
observationProbabilities,
nextBelieves, result, gridResolution,
currentBeliefId, beliefList.size(), min);
}
// Iterate over all actions and add the corresponding transitions
for (auto const &action : chosenActions[currentBeliefId]) {
std::vector<std::map<uint64_t, ValueType>> actionTransitionStorage;
//TODO add a way to extract the actions from the over-approx and use them here?
for (uint64_t action = 0; action < numChoices; ++action) {
std::map<uint64_t, ValueType> transitionsInStateWithAction;
for (auto iter = observationProbabilities[currentBeliefId][action].begin(); iter != observationProbabilities[currentBeliefId][action].end(); ++iter) {
std::map<uint32_t, ValueType> observationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currentBeliefId], action);
for (auto iter = observationProbabilities.begin(); iter != observationProbabilities.end(); ++iter) {
uint32_t observation = iter->first;
uint64_t nextBeliefId = nextBelieves[currentBeliefId][action][observation];
uint64_t nextBeliefId = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currentBeliefId],
action,
observation, nextId);
nextId = beliefList.size();
if (visitedBelieves.insert(nextBeliefId).second) {
beliefStateMap[nextBeliefId] = stateId;
++stateId;
believesToBeExpanded.push_back(nextBeliefId);
++counter;
}
transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second;
}
actionTransitionStorage.push_back(transitionsInStateWithAction);
}
}
transitions.push_back(actionTransitionStorage);
}
believesToBeExpanded.pop_front();
}
@ -949,24 +773,6 @@ namespace storm {
transitionMatrix.makeRowGroupingTrivial();
}
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(transitionMatrix, labeling);
if (transitionMatrix.hasTrivialRowGrouping()) {
storm::models::sparse::Dtmc<ValueType, RewardModelType> underApproxMc(modelComponents);
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(std::vector<ValueType>(beliefStateMap.size()));
if (computeRewards) {
for (auto const &iter : beliefStateMap) {
auto currentBelief = beliefList[iter.first];
// Add the reward collected by taking the chosen Action in the belief
rewardModel.setStateReward(iter.second, getRewardAfterAction(pomdp, pomdp.getChoiceIndex(
storm::storage::StateActionPair(pomdp.getStatesWithObservation(currentBelief.observation).front(), chosenActions[iter.first][0])),
currentBelief));
}
}
underApproxMc.addRewardModel("std", rewardModel);
underApproxMc.restrictRewardModels(std::set<std::string>({"std"}));
model = std::make_shared<storm::models::sparse::Dtmc<ValueType, RewardModelType>>(underApproxMc);
} else {
storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
@ -984,9 +790,11 @@ namespace storm {
underApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
}
model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underApproxMdp);
}
model->printModelInformationToStream(std::cout);
storm::api::exportSparseModelAsDot(model, "ua_model.dot");
std::string propertyString;
if (computeRewards) {
propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]";
@ -996,10 +804,13 @@ namespace storm {
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front();
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, true)));
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, false)));
STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates()));
return res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap().begin()->second;
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]];
return underApprox;
}

17
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -80,7 +80,7 @@ namespace storm {
computeFirstRefinementStep(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::set<uint32_t> const &targetObservations, bool min, std::vector<uint64_t> &observationResolutionVector,
bool computeRewards, double explorationThreshold, boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
/**
*
@ -96,7 +96,7 @@ namespace storm {
std::set<uint32_t> const &targetObservations, bool min,
std::vector<uint64_t> &observationResolutionVector, bool computeRewards, double explorationThreshold,
boost::optional<std::map<uint64_t, ValueType>> overApproximationMap = boost::none,
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none);
boost::optional<std::map<uint64_t, ValueType>> underApproximationMap = boost::none, uint64_t maxUaModelSize = 200);
/**
*
@ -164,24 +164,17 @@ namespace storm {
* @param beliefList
* @param beliefIsTarget
* @param targetObservations
* @param observationProbabilities
* @param nextBelieves
* @param result
* @param chosenActions
* @param gridResolution
* @param initialBeliefId
* @param min
* @param computeReward
* @param maxModelSize
* @return
*/
ValueType computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> const &targetObservations,
std::map<uint64_t, std::vector<std::map<uint32_t, ValueType>>> &observationProbabilities,
std::map<uint64_t, std::vector<std::map<uint32_t, uint64_t>>> &nextBelieves,
std::map<uint64_t, ValueType> &result,
std::map<uint64_t, std::vector<uint64_t>> &chosenActions,
uint64_t gridResolution, uint64_t initialBeliefId, bool min, bool computeReward, bool generateMdp);
uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize);
/**
*

Loading…
Cancel
Save