diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 282f4dd68..37a29f9b1 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -52,14 +52,10 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { STORM_PRINT("Use On-The-Fly Grid Generation" << std::endl) - - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; std::vector> beliefGrid; std::map result; - std::map result_backup; //Use caching to avoid multiple computation of the subsimplices and lambdas std::map>> subSimplexCache; std::map> lambdaCache; @@ -147,7 +143,6 @@ namespace storm { if (isTarget) { // Depending on whether we compute rewards, we select the right initial result result.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); - result_backup.emplace(std::make_pair(currId, computeRewards ? storm::utility::zero() : storm::utility::one())); // MDP stuff std::vector> transitionsInBelief; @@ -158,7 +153,6 @@ namespace storm { mdpTransitions.push_back(transitionsInBelief); } else { result.emplace(std::make_pair(currId, storm::utility::zero())); - result_backup.emplace(std::make_pair(currId, storm::utility::zero())); uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); @@ -274,6 +268,61 @@ namespace storm { overApproxMdp.printModelInformationToStream(std::cout); storm::utility::Stopwatch overApproxTimer(true); + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); + overApproxTimer.stop(); + auto underApprox = storm::utility::zero(); + /* + storm::utility::Stopwatch underApproxTimer(true); + ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, + result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); + underApproxTimer.stop(); + + STORM_PRINT("Time Overapproximation: " << overApproxTimer + << std::endl + << "Time Underapproximation: " << underApproxTimer + << std::endl);*/ + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); + //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); + + auto model = std::make_shared>(overApproxMdp); + auto modelPtr = std::static_pointer_cast>(model); + std::vector parameterNames; + storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); + + std::string propertyString = computeRewards ? "R" : "P"; + propertyString += min ? "min" : "max"; + propertyString += "=? [F \"target\"]"; + std::vector propertyVector = storm::api::parseProperties(propertyString); + std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); + + std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); + STORM_LOG_ASSERT(res, "Result not exist."); + res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); + STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); + + return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + } + + template + ValueType + ApproximatePOMDPModelchecker::overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, + std::vector> &beliefGrid, + std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, + std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards) { + std::map result_backup = result; + uint64_t iteration = 0; + bool finished = false; // Value Iteration while (!finished && iteration < maxIterations) { storm::utility::Stopwatch iterationTimer(true); @@ -304,9 +353,8 @@ namespace storm { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - //TODO This should not ne reachable - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - nextBelief.probabilities, gridResolution); + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, + gridResolution); subSimplex = temp.first; lambdas = temp.second; subSimplexCache[nextBelief.id] = subSimplex; @@ -353,45 +401,14 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); + auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; + for (size_t j = 0; j < lambdaCache[0].size(); ++j) { + if (lambdaCache[0][j] != storm::utility::zero()) { + overApprox += lambdaCache[0][j] * result_backup[getBeliefIdInVector(beliefGrid, beliefList[0].observation, subSimplexCache[0][j])]; } } - overApproxTimer.stop(); - auto underApprox = storm::utility::zero(); - /* - storm::utility::Stopwatch underApproxTimer(true); - ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, - result, chosenActions, gridResolution, initialBelief.id, min, computeRewards); - underApproxTimer.stop(); - - STORM_PRINT("Time Overapproximation: " << overApproxTimer - << std::endl - << "Time Underapproximation: " << underApproxTimer - << std::endl);*/ - - STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); - //STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); - - auto model = std::make_shared>(overApproxMdp); - auto modelPtr = std::static_pointer_cast>(model); - std::vector parameterNames; - storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); - - std::string propertyString = computeRewards ? "R" : "P"; - propertyString += min ? "min" : "max"; - propertyString += "=? [F \"target\"]"; - std::vector propertyVector = storm::api::parseProperties(propertyString); - std::shared_ptr property = storm::api::extractFormulasFromProperties(propertyVector).front(); - - std::unique_ptr res(storm::api::verifyWithSparseEngine(model, storm::api::createTask(property, true))); - STORM_LOG_ASSERT(res, "Result not exist."); - res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); - STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult().getValueMap().begin()->second) << std::endl); - - return std::make_unique>(POMDPCheckResult{overApprox, underApprox}); + return overApprox; } template @@ -416,8 +433,6 @@ namespace storm { std::set const &targetObservations, bool min, uint64_t gridResolution, bool computeRewards) { storm::utility::Stopwatch beliefGridTimer(true); - bool finished = false; - uint64_t iteration = 0; std::vector> beliefList; std::vector beliefIsTarget; @@ -436,7 +451,6 @@ namespace storm { storm::utility::Stopwatch overApproxTimer(true); // Belief ID -> Value std::map result; - std::map result_backup; // Belief ID -> ActionIndex std::map> chosenActions; @@ -446,6 +460,13 @@ namespace storm { std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; + //Use caching to avoid multiple computation of the subsimplices and lambdas + std::map>> subSimplexCache; + std::map> lambdaCache; + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + subSimplexCache[0] = temp.first; + lambdaCache[0] = temp.second; storm::utility::Stopwatch nextBeliefGeneration(true); for (size_t i = 0; i < beliefGrid.size(); ++i) { @@ -453,10 +474,8 @@ namespace storm { bool isTarget = beliefIsTarget[currentBelief.id]; if (isTarget) { result.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); - result_backup.emplace(std::make_pair(currentBelief.id, computeRewards ? storm::utility::zero() : storm::utility::one())); } else { result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); - result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); //TODO put this in extra function // As we need to grab some parameters which are the same for all states with the same observation, we simply select some state as the representative uint64_t representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); @@ -494,100 +513,11 @@ namespace storm { } nextBeliefGeneration.stop(); - //Use chaching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; - std::map> lambdaCache; - STORM_PRINT("Time generation of next believes: " << nextBeliefGeneration << std::endl) // Value Iteration - while (!finished && iteration < maxIterations) { - storm::utility::Stopwatch iterationTimer(true); - STORM_LOG_DEBUG("Iteration " << iteration + 1); - bool improvement = false; - for (size_t i = 0; i < beliefGrid.size(); ++i) { - storm::pomdp::Belief currentBelief = beliefGrid[i]; - bool isTarget = beliefIsTarget[currentBelief.id]; - if (!isTarget) { - // we can take any state with the observation as they have the same number of choices - uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(currentBelief.observation).front()); - // Initialize the values for the value iteration - ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); - std::vector chosenActionIndices; - ValueType currentValue; - - for (uint64_t action = 0; action < numChoices; ++action) { - currentValue = computeRewards ? beliefActionRewards[currentBelief.id][action] : storm::utility::zero(); - for (auto iter = observationProbabilities[currentBelief.id][action].begin(); - iter != observationProbabilities[currentBelief.id][action].end(); ++iter) { - uint32_t observation = iter->first; - storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; - // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - // cache the values to not always re-calculate - std::vector> subSimplex; - std::vector lambdas; - if (subSimplexCache.count(nextBelief.id) > 0) { - subSimplex = subSimplexCache[nextBelief.id]; - lambdas = lambdaCache[nextBelief.id]; - } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, - gridResolution); - subSimplex = temp.first; - lambdas = temp.second; - subSimplexCache[nextBelief.id] = subSimplex; - lambdaCache[nextBelief.id] = lambdas; - } - auto sum = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (!cc.isEqual(lambdas[j], storm::utility::zero())) { - sum += lambdas[j] * result_backup.at(getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); - } - } - - currentValue += iter->second * sum; - } - // Update the selected actions - if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - chosenValue)) || - cc.isEqual(storm::utility::zero(), chosenValue - currentValue)) { - chosenValue = currentValue; - if (!(useMdp && cc.isEqual(storm::utility::zero(), chosenValue - currentValue))) { - chosenActionIndices.clear(); - } - chosenActionIndices.push_back(action); - } - } - result[currentBelief.id] = chosenValue; - chosenActions[currentBelief.id] = chosenActionIndices; - // Check if the iteration brought an improvement - if (!cc.isEqual(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) { - improvement = true; - } - } - } - finished = !improvement; - // back up - result_backup = result; - - ++iteration; - iterationTimer.stop(); - STORM_PRINT("Iteration " << iteration << ": " << iterationTimer << std::endl); - } - - STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); - - beliefGrid.push_back(initialBelief); - beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); - - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); - std::vector> initSubSimplex = temp.first; - std::vector initLambdas = temp.second; - - auto overApprox = storm::utility::zero(); - for (size_t j = 0; j < initLambdas.size(); ++j) { - if (initLambdas[j] != storm::utility::zero()) { - overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; - } - } + auto overApprox = overApproximationValueIteration(pomdp, beliefList, beliefGrid, beliefIsTarget, observationProbabilities, nextBelieves, beliefActionRewards, + subSimplexCache, lambdaCache, + result, chosenActions, gridResolution, min, computeRewards); overApproxTimer.stop(); // Now onto the under-approximation diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index f20f4ebc1..6d8b50d43 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -240,6 +240,17 @@ namespace storm { ValueType getRewardAfterAction(storm::models::sparse::Pomdp const &pomdp, uint64_t action, storm::pomdp::Belief belief); + ValueType + overApproximationValueIteration(storm::models::sparse::Pomdp const &pomdp, std::vector> &beliefList, + std::vector> &beliefGrid, std::vector &beliefIsTarget, + std::map>> &observationProbabilities, + std::map>> &nextBelieves, + std::map> &beliefActionRewards, + std::map>> &subSimplexCache, + std::map> &lambdaCache, std::map &result, + std::map> &chosenActions, + uint64_t gridResolution, bool min, bool computeRewards); + storm::utility::ConstantsComparator cc; double precision; bool useMdp;