Browse Source

Rudimentary version of the refinement loop

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
e02ea57a58
  1. 112
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 30
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

112
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -56,7 +56,7 @@ namespace storm {
std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
STORM_LOG_ASSERT(underlyingRes, "Result not exist.");
underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true)));
auto overApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
underlyingWatch.stop();
storm::utility::Stopwatch positionalWatch(true);
@ -80,6 +80,8 @@ namespace storm {
auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
positionalWatch.stop();
STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // "
<< underApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl)
STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl)
// Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution.
@ -90,10 +92,65 @@ namespace storm {
auto underRes = storm::utility::zero<ValueType>();
uint64_t underApproxModelSize = 50;
uint64_t refinementCounter = 1;
std::unique_ptr<POMDPCheckResult<ValueType>> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
overApproxMap, underApproxMap, underApproxModelSize);
// TODO the actual refinement
return res;
std::unique_ptr<RefinementComponents<ValueType>> res;
while (refinementCounter < 30) {
res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold,
initialOverApproxMap, underApproxMap, underApproxModelSize);
// TODO the actual refinement
// choose which observation(s) to refine
std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
std::vector<uint64_t> beliefCount(pomdp.getNrObservations(), 0);
bsmap_type::right_map::const_iterator iter = res->underApproxBeliefStateMap.right.begin();
while (iter != res->underApproxBeliefStateMap.right.end()) {
auto currentBelief = res->beliefList[iter->second];
beliefCount[currentBelief.observation] += 1;
//TODO rename, this is getting confusing
bsmap_type::left_const_iterator it = res->overApproxBeliefStateMap.left.find(iter->second);
if (it != res->overApproxBeliefStateMap.left.end()) {
// If there is an over-approximate value for the belief, use it
auto diff = res->overApproxMap[it->second] - res->underApproxMap[iter->first];
obsAccumulator[currentBelief.observation] += diff;
} else {
//otherwise, we approximate a value TODO this is critical, we have to think about it
auto overApproxValue = storm::utility::zero<ValueType>();
auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation]);
auto subSimplex = temp.first;
auto lambdas = temp.second;
for (size_t j = 0; j < lambdas.size(); ++j) {
if (!cc.isEqual(lambdas[j], storm::utility::zero<ValueType>())) {
uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]);
bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId);
if (approxIter != res->overApproxBeliefStateMap.left.end()) {
overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second];
} else {
overApproxValue += lambdas[j];
}
}
}
obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[iter->first];
}
++iter;
}
for (uint64_t i = 0; i < obsAccumulator.size(); ++i) {
obsAccumulator[i] /= beliefCount[i];
}
//TODO think about some other scoring methods
auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end());
STORM_PRINT("Max Score: " << maxAvgDifference << std::endl)
STORM_PRINT(" Obs | Score " << std::endl << "---------|---------" << std::endl)
for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) {
STORM_PRINT(i << " |" << obsAccumulator[i] << std::endl)
if (obsAccumulator[i] == maxAvgDifference) {
observationResolutionVector[i] *= 2;
}
}
underApproxModelSize += 10;
++refinementCounter;
}
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{res->overApproxValue, res->underApproxValue});
}
template<typename ValueType, typename RewardModelType>
@ -417,21 +474,22 @@ namespace storm {
STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl)
//auto underApprox = weightedSumUnderMap[initialBelief.id];
auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize);
auto underApproxComponents = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards,
maxUaModelSize);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
// Transfer the underapproximation results from the belief id space to the MDP id space
std::map<uint64_t, ValueType> underApproxResultMap = {{0, storm::utility::zero<ValueType>()},
{1, storm::utility::one<ValueType>()}};
for (auto const &belief : beliefGrid) {
underApproxResultMap[beliefStateMap.left.at(belief.id)] = weightedSumUnderMap[belief.id];
}
STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl);
return std::make_unique<RefinementComponents<ValueType>>(
RefinementComponents<ValueType>{modelPtr, overApprox, underApprox, overApproxResultMap, underApproxResultMap, beliefList, beliefIsTarget, beliefStateMap});
RefinementComponents<ValueType>{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap,
underApproxComponents->underApproxMap, beliefList, beliefIsTarget, beliefStateMap,
underApproxComponents->underApproxBeliefStateMap});
}
/*
template<typename ValueType, typename RewardModelType>
std::unique_ptr<RefinementComponents<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeRefinementStep(){}*/
template<typename ValueType, typename RewardModelType>
ValueType
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::overApproximationValueIteration(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
@ -499,8 +557,8 @@ namespace storm {
}
// Update the selected actions
if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
(!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
(!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue)) ||
cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue)) {
chosenValue = currentValue;
if (!(useMdp && cc.isEqual(storm::utility::zero<ValueType>(), chosenValue - currentValue))) {
chosenActionIndices.clear();
@ -697,7 +755,7 @@ namespace storm {
}
template<typename ValueType, typename RewardModelType>
ValueType
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
@ -706,13 +764,13 @@ namespace storm {
bool computeRewards, uint64_t maxModelSize) {
std::set<uint64_t> visitedBelieves;
std::deque<uint64_t> believesToBeExpanded;
std::map<uint64_t, uint64_t> beliefStateMap;
bsmap_type beliefStateMap;
std::vector<std::vector<std::map<uint64_t, ValueType>>> transitions = {{{{0, storm::utility::one<ValueType>()}}},
{{{1, storm::utility::one<ValueType>()}}}};
std::vector<uint64_t> targetStates = {1};
uint64_t stateId = 2;
beliefStateMap[initialBeliefId] = stateId;
beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId));
++stateId;
uint64_t nextId = beliefList.size();
uint64_t counter = 0;
@ -727,8 +785,8 @@ namespace storm {
// for targets, we only consider one action with one transition
if (beliefIsTarget[currentBeliefId]) {
// add a self-loop to target states
targetStates.push_back(beliefStateMap[currentBeliefId]);
transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one<ValueType>()}}});
targetStates.push_back(beliefStateMap.left.at(currentBeliefId));
transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one<ValueType>()}}});
} else if (counter > maxModelSize) {
transitions.push_back({{{0, storm::utility::one<ValueType>()}}});
} else {
@ -745,12 +803,12 @@ namespace storm {
observation, nextId);
nextId = beliefList.size();
if (visitedBelieves.insert(nextBeliefId).second) {
beliefStateMap[nextBeliefId] = stateId;
beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId));
++stateId;
believesToBeExpanded.push_back(nextBeliefId);
++counter;
}
transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second;
transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second;
}
actionTransitionStorage.push_back(transitionsInStateWithAction);
}
@ -776,7 +834,7 @@ namespace storm {
storm::models::sparse::Mdp<ValueType, RewardModelType> underApproxMdp(modelComponents);
if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> rewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
for (auto const &iter : beliefStateMap) {
for (auto const &iter : beliefStateMap.left) {
auto currentBelief = beliefList[iter.first];
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) {
@ -808,9 +866,9 @@ namespace storm {
STORM_LOG_ASSERT(res, "Result does not exist.");
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true)));
auto underApproxResultMap = res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap();
auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]];
auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)];
return underApprox;
return std::make_unique<UnderApproxComponents<ValueType>>(UnderApproxComponents<ValueType>{underApprox, underApproxResultMap, beliefStateMap});
}

30
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -11,7 +11,6 @@ namespace storm {
namespace pomdp {
namespace modelchecker {
typedef boost::bimap<uint64_t, uint64_t> bsmap_type;
typedef boost::bimap<std::pair<uint64_t, uint64_t>, uint64_t> uamap_type;
template<class ValueType>
struct POMDPCheckResult {
@ -28,11 +27,19 @@ namespace storm {
std::shared_ptr<storm::models::sparse::Model<ValueType, RewardModelType>> overApproxModelPtr;
ValueType overApproxValue;
ValueType underApproxValue;
std::map<uint64_t, ValueType> &overApproxMap;
std::map<uint64_t, ValueType> &underApproxMap;
std::vector<storm::pomdp::Belief<ValueType>> &beliefList;
std::vector<bool> &beliefIsTarget;
bsmap_type &beliefStateMap;
std::map<uint64_t, ValueType> overApproxMap;
std::map<uint64_t, ValueType> underApproxMap;
std::vector<storm::pomdp::Belief<ValueType>> beliefList;
std::vector<bool> beliefIsTarget;
bsmap_type overApproxBeliefStateMap;
bsmap_type underApproxBeliefStateMap;
};
template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
struct UnderApproxComponents {
ValueType underApproxValue;
std::map<uint64_t, ValueType> underApproxMap;
bsmap_type underApproxBeliefStateMap;
};
template<class ValueType, typename RewardModelType = models::sparse::StandardRewardModel<ValueType>>
@ -170,11 +177,12 @@ namespace storm {
* @param maxModelSize
* @return
*/
ValueType computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> const &targetObservations,
uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize);
std::unique_ptr<UnderApproxComponents<ValueType, RewardModelType>> computeUnderapproximation(storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp,
std::vector<storm::pomdp::Belief<ValueType>> &beliefList,
std::vector<bool> &beliefIsTarget,
std::set<uint32_t> const &targetObservations,
uint64_t initialBeliefId, bool min, bool computeReward,
uint64_t maxModelSize);
/**
*

Loading…
Cancel
Save