From e02ea57a58772057925542b53237d36790493553 Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Fri, 24 Jan 2020 17:08:46 +0100 Subject: [PATCH] Rudimentary version of the refinement loop --- .../ApproximatePOMDPModelchecker.cpp | 112 +++++++++++++----- .../ApproximatePOMDPModelchecker.h | 30 +++-- 2 files changed, 104 insertions(+), 38 deletions(-) diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 84c055d1f..ab14f7349 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -56,7 +56,7 @@ namespace storm { std::unique_ptr underlyingRes(storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); STORM_LOG_ASSERT(underlyingRes, "Result not exist."); underlyingRes->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underlyingMdp.getNumberOfStates(), true))); - auto overApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); + auto initialOverApproxMap = underlyingRes->asExplicitQuantitativeCheckResult().getValueMap(); underlyingWatch.stop(); storm::utility::Stopwatch positionalWatch(true); @@ -80,6 +80,8 @@ namespace storm { auto underApproxMap = underapproxRes->asExplicitQuantitativeCheckResult().getValueMap(); positionalWatch.stop(); + STORM_PRINT("Pre-Processing Results: " << initialOverApproxMap[underlyingMdp.getInitialStates().getNextSetIndex(0)] << " // " + << underApproxMap[underApproxModel->getInitialStates().getNextSetIndex(0)] << std::endl) STORM_PRINT("Preprocessing Times: " << underlyingWatch << " / " << positionalWatch << std::endl) // Initialize the resolution mapping. For now, we always give all beliefs with the same observation the same resolution. @@ -90,10 +92,65 @@ namespace storm { auto underRes = storm::utility::zero(); uint64_t underApproxModelSize = 50; uint64_t refinementCounter = 1; - std::unique_ptr> res = computeReachabilityOTF(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, - overApproxMap, underApproxMap, underApproxModelSize); - // TODO the actual refinement - return res; + std::unique_ptr> res; + while (refinementCounter < 30) { + res = computeFirstRefinementStep(pomdp, targetObservations, min, observationResolutionVector, false, explorationThreshold, + initialOverApproxMap, underApproxMap, underApproxModelSize); + // TODO the actual refinement + // choose which observation(s) to refine + std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); + std::vector beliefCount(pomdp.getNrObservations(), 0); + bsmap_type::right_map::const_iterator iter = res->underApproxBeliefStateMap.right.begin(); + while (iter != res->underApproxBeliefStateMap.right.end()) { + auto currentBelief = res->beliefList[iter->second]; + beliefCount[currentBelief.observation] += 1; + //TODO rename, this is getting confusing + bsmap_type::left_const_iterator it = res->overApproxBeliefStateMap.left.find(iter->second); + if (it != res->overApproxBeliefStateMap.left.end()) { + // If there is an over-approximate value for the belief, use it + auto diff = res->overApproxMap[it->second] - res->underApproxMap[iter->first]; + obsAccumulator[currentBelief.observation] += diff; + } else { + //otherwise, we approximate a value TODO this is critical, we have to think about it + auto overApproxValue = storm::utility::zero(); + auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation]); + auto subSimplex = temp.first; + auto lambdas = temp.second; + for (size_t j = 0; j < lambdas.size(); ++j) { + if (!cc.isEqual(lambdas[j], storm::utility::zero())) { + uint64_t approxId = getBeliefIdInVector(res->beliefList, currentBelief.observation, subSimplex[j]); + bsmap_type::left_const_iterator approxIter = res->overApproxBeliefStateMap.left.find(approxId); + if (approxIter != res->overApproxBeliefStateMap.left.end()) { + overApproxValue += lambdas[j] * res->overApproxMap[approxIter->second]; + } else { + overApproxValue += lambdas[j]; + } + } + } + obsAccumulator[currentBelief.observation] += overApproxValue - res->underApproxMap[iter->first]; + } + ++iter; + } + + for (uint64_t i = 0; i < obsAccumulator.size(); ++i) { + obsAccumulator[i] /= beliefCount[i]; + } + + //TODO think about some other scoring methods + auto maxAvgDifference = *std::max_element(obsAccumulator.begin(), obsAccumulator.end()); + STORM_PRINT("Max Score: " << maxAvgDifference << std::endl) + STORM_PRINT(" Obs | Score " << std::endl << "---------|---------" << std::endl) + for (uint64_t i = 0; i < pomdp.getNrObservations(); ++i) { + STORM_PRINT(i << " |" << obsAccumulator[i] << std::endl) + if (obsAccumulator[i] == maxAvgDifference) { + observationResolutionVector[i] *= 2; + } + } + underApproxModelSize += 10; + ++refinementCounter; + } + + return std::make_unique>(POMDPCheckResult{res->overApproxValue, res->underApproxValue}); } template @@ -417,21 +474,22 @@ namespace storm { STORM_PRINT("Time Overapproximation: " << overApproxTimer << std::endl) //auto underApprox = weightedSumUnderMap[initialBelief.id]; - auto underApprox = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, maxUaModelSize); + auto underApproxComponents = computeUnderapproximation(pomdp, beliefList, beliefIsTarget, targetObservations, initialBelief.id, min, computeRewards, + maxUaModelSize); STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - // Transfer the underapproximation results from the belief id space to the MDP id space - std::map underApproxResultMap = {{0, storm::utility::zero()}, - {1, storm::utility::one()}}; - for (auto const &belief : beliefGrid) { - underApproxResultMap[beliefStateMap.left.at(belief.id)] = weightedSumUnderMap[belief.id]; - } + STORM_PRINT("Under-Approximation Result: " << underApproxComponents->underApproxValue << std::endl); return std::make_unique>( - RefinementComponents{modelPtr, overApprox, underApprox, overApproxResultMap, underApproxResultMap, beliefList, beliefIsTarget, beliefStateMap}); + RefinementComponents{modelPtr, overApprox, underApproxComponents->underApproxValue, overApproxResultMap, + underApproxComponents->underApproxMap, beliefList, beliefIsTarget, beliefStateMap, + underApproxComponents->underApproxBeliefStateMap}); } +/* + template + std::unique_ptr> + ApproximatePOMDPModelchecker::computeRefinementStep(){}*/ + template ValueType ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, @@ -499,8 +557,8 @@ namespace storm { } // Update the selected actions if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { + (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || + cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { chosenValue = currentValue; if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { chosenActionIndices.clear(); @@ -697,7 +755,7 @@ namespace storm { } template - ValueType + std::unique_ptr> ApproximatePOMDPModelchecker::computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, std::vector &beliefIsTarget, @@ -706,13 +764,13 @@ namespace storm { bool computeRewards, uint64_t maxModelSize) { std::set visitedBelieves; std::deque believesToBeExpanded; - std::map beliefStateMap; + bsmap_type beliefStateMap; std::vector>> transitions = {{{{0, storm::utility::one()}}}, {{{1, storm::utility::one()}}}}; std::vector targetStates = {1}; uint64_t stateId = 2; - beliefStateMap[initialBeliefId] = stateId; + beliefStateMap.insert(bsmap_type::value_type(initialBeliefId, stateId)); ++stateId; uint64_t nextId = beliefList.size(); uint64_t counter = 0; @@ -727,8 +785,8 @@ namespace storm { // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { // add a self-loop to target states - targetStates.push_back(beliefStateMap[currentBeliefId]); - transitions.push_back({{{beliefStateMap[currentBeliefId], storm::utility::one()}}}); + targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); + transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); } else if (counter > maxModelSize) { transitions.push_back({{{0, storm::utility::one()}}}); } else { @@ -745,12 +803,12 @@ namespace storm { observation, nextId); nextId = beliefList.size(); if (visitedBelieves.insert(nextBeliefId).second) { - beliefStateMap[nextBeliefId] = stateId; + beliefStateMap.insert(bsmap_type::value_type(nextBeliefId, stateId)); ++stateId; believesToBeExpanded.push_back(nextBeliefId); ++counter; } - transitionsInStateWithAction[beliefStateMap[nextBeliefId]] = iter->second; + transitionsInStateWithAction[beliefStateMap.left.at(nextBeliefId)] = iter->second; } actionTransitionStorage.push_back(transitionsInStateWithAction); } @@ -776,7 +834,7 @@ namespace storm { storm::models::sparse::Mdp underApproxMdp(modelComponents); if (computeRewards) { storm::models::sparse::StandardRewardModel rewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); - for (auto const &iter : beliefStateMap) { + for (auto const &iter : beliefStateMap.left) { auto currentBelief = beliefList[iter.first]; auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); for (uint64_t action = 0; action < underApproxMdp.getNumberOfChoices(iter.second); ++action) { @@ -808,9 +866,9 @@ namespace storm { STORM_LOG_ASSERT(res, "Result does not exist."); res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(storm::storage::BitVector(underApproxMdp.getNumberOfStates(), true))); auto underApproxResultMap = res->asExplicitQuantitativeCheckResult().getValueMap(); - auto underApprox = underApproxResultMap[beliefStateMap[initialBeliefId]]; + auto underApprox = underApproxResultMap[beliefStateMap.left.at(initialBeliefId)]; - return underApprox; + return std::make_unique>(UnderApproxComponents{underApprox, underApproxResultMap, beliefStateMap}); } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 867f9d673..5e8a95025 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -11,7 +11,6 @@ namespace storm { namespace pomdp { namespace modelchecker { typedef boost::bimap bsmap_type; - typedef boost::bimap, uint64_t> uamap_type; template struct POMDPCheckResult { @@ -28,11 +27,19 @@ namespace storm { std::shared_ptr> overApproxModelPtr; ValueType overApproxValue; ValueType underApproxValue; - std::map &overApproxMap; - std::map &underApproxMap; - std::vector> &beliefList; - std::vector &beliefIsTarget; - bsmap_type &beliefStateMap; + std::map overApproxMap; + std::map underApproxMap; + std::vector> beliefList; + std::vector beliefIsTarget; + bsmap_type overApproxBeliefStateMap; + bsmap_type underApproxBeliefStateMap; + }; + + template> + struct UnderApproxComponents { + ValueType underApproxValue; + std::map underApproxMap; + bsmap_type underApproxBeliefStateMap; }; template> @@ -170,11 +177,12 @@ namespace storm { * @param maxModelSize * @return */ - ValueType computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, - std::vector> &beliefList, - std::vector &beliefIsTarget, - std::set const &targetObservations, - uint64_t initialBeliefId, bool min, bool computeReward, uint64_t maxModelSize); + std::unique_ptr> computeUnderapproximation(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector &beliefIsTarget, + std::set const &targetObservations, + uint64_t initialBeliefId, bool min, bool computeReward, + uint64_t maxModelSize); /** *