diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 0688e1cf2..e44102656 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -20,17 +20,26 @@ namespace storm { bool finished = false; uint64_t iteration = 0; + std::vector> beliefList; + uint64_t nextId = 0; + // Initial belief always has ID 0 + storm::pomdp::Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + beliefList.push_back(initialBelief); - std::vector> beliefGrid; + std::vector> beliefGrid; std::vector beliefIsKnown; - constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown); + constructBeliefGrid(pomdp, target_observations, gridResolution, beliefList, beliefGrid, beliefIsKnown, + nextId); + nextId = beliefList.size(); + std::map result; std::map result_backup; + std::map chosenActions; std::vector>> observationProbabilities; - std::vector>>> nextBelieves; + std::vector>> nextBelieves; - uint64_t nextId = beliefGrid.size(); for (size_t i = 0; i < beliefGrid.size(); ++i) { auto currentBelief = beliefGrid[i]; bool isTarget = beliefIsKnown[i]; @@ -42,23 +51,24 @@ namespace storm { result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); std::vector> observationProbabilitiesInAction; - std::vector>> nextBelievesInAction; + std::vector> nextBelievesInAction; uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( pomdp, currentBelief, action); - std::map> actionObservationBelieves; + std::map actionObservationBelieves; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, currentBelief, action, observation, nextId); - ++nextId; + nextId = beliefList.size(); } observationProbabilitiesInAction.push_back(actionObservationProbabilities); nextBelievesInAction.push_back(actionObservationBelieves); @@ -67,24 +77,18 @@ namespace storm { nextBelieves.push_back(nextBelievesInAction); } } - - STORM_LOG_DEBUG("End of Section 1"); // Value Iteration auto cc = storm::utility::ConstantsComparator(); while (!finished && iteration < maxIterations) { - STORM_PRINT("Iteration " << std::to_string(iteration) << std::endl); + STORM_LOG_DEBUG("Iteration " << iteration + 1); bool improvement = false; for (size_t i = 0; i < beliefGrid.size(); ++i) { bool isTarget = beliefIsKnown[i]; if (!isTarget) { - Belief currentBelief = beliefGrid[i]; - STORM_LOG_DEBUG( - "Check Belief " << currentBelief.id << ": ||" << currentBelief.observation << "|" - << currentBelief.probabilities << "||"); + storm::pomdp::Belief currentBelief = beliefGrid[i]; // we can take any state with the observation as they have the same number of choices uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); - STORM_LOG_DEBUG("Number choices: " << std::to_string(numChoices)); // Initialize the values for the value iteration ValueType chosenValue = min ? storm::utility::infinity() : -storm::utility::infinity(); @@ -96,8 +100,7 @@ namespace storm { for (auto iter = observationProbabilities[i][action].begin(); iter != observationProbabilities[i][action].end(); ++iter) { uint32_t observation = iter->first; - Belief nextBelief = nextBelieves[i][action][observation]; - + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[i][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief std::pair>, std::vector> temp = computeSubSimplexAndLambdas( nextBelief.probabilities, gridResolution); @@ -108,7 +111,7 @@ namespace storm { for (size_t j = 0; j < lambdas.size(); ++j) { if (lambdas[j] != storm::utility::zero()) { sum += lambdas[j] * result_backup.at( - getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); } } currentValue += iter->second * sum; @@ -124,22 +127,12 @@ namespace storm { // TODO tie breaker? } result[currentBelief.id] = chosenValue; + chosenActions[currentBelief.id] = chosenActionIndex; // Check if the iteration brought an improvement if ((min && cc.isLess(storm::utility::zero(), result_backup[currentBelief.id] - result[currentBelief.id])) || (!min && cc.isLess(storm::utility::zero(), result[currentBelief.id] - result_backup[currentBelief.id]))) { - if (min) { - STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " - << result[currentBelief.id] << std::endl << "Delta: " - << result_backup[currentBelief.id] - result[currentBelief.id] - << std::endl); - } else { - STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " - << result[currentBelief.id] << std::endl << "Delta: " - << result_backup[currentBelief.id] - result[currentBelief.id] - << std::endl); - } improvement = true; } } @@ -152,32 +145,120 @@ namespace storm { ++iteration; } - // maybe change this so the initial Belief always has ID 0 - Belief initialBelief = getInitialBelief(pomdp, nextId); - ++nextId; + STORM_PRINT("Grid approximation took " << iteration << " iterations" << std::endl); + + beliefGrid.push_back(initialBelief); + beliefIsKnown.push_back( + target_observations.find(initialBelief.observation) != target_observations.end()); std::pair>, std::vector> temp = computeSubSimplexAndLambdas( initialBelief.probabilities, gridResolution); - std::vector> subSimplex = temp.first; - std::vector lambdas = temp.second; - - ValueType overApprox = storm::utility::zero(); - for (size_t j = 0; j < lambdas.size(); ++j) { - if (lambdas[j] != storm::utility::zero()) { - overApprox += lambdas[j] * - result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation, - subSimplex[j])]; + std::vector> initSubSimplex = temp.first; + std::vector initLambdas = temp.second; + + auto overApprox = storm::utility::zero(); + for (size_t j = 0; j < initLambdas.size(); ++j) { + if (initLambdas[j] != storm::utility::zero()) { + overApprox += initLambdas[j] * + result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, + initSubSimplex[j])]; } } + // Now onto the under-approximation + + + /* All this has to be put into a separate function as we have to repeat it for other believes not in the grid + // first compute some things for the initial belief + std::vector> observationProbabilitiesInAction; + std::vector> nextBelievesInAction; + + uint64_t initialNumChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(initialBelief.observation).front()); + for (uint64_t action = 0; action < initialNumChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, initialBelief, action); + std::map actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + beliefList, + initialBelief, + action, + observation, + nextId); + } + observationProbabilitiesInAction.push_back(actionObservationProbabilities); + nextBelievesInAction.push_back(actionObservationBelieves); + } + observationProbabilities.push_back(observationProbabilitiesInAction); + nextBelieves.push_back(nextBelievesInAction); + + // do one step here to get the best action in the initial state + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < initialNumChoices; ++action) { + currentValue = storm::utility::zero(); // simply change this for rewards? + for (auto iter = observationProbabilities[initialBelief.id][action].begin(); + iter != observationProbabilities[initialBelief.id][action].end(); ++iter) { + uint32_t observation = iter->first; + storm::pomdp::Belief nextBelief = beliefList[nextBelieves[initialBelief.id][action][observation]]; + + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + temp = computeSubSimplexAndLambdas( + nextBelief.probabilities, gridResolution); + std::vector> subSimplex = temp.first; + std::vector lambdas = temp.second; + + ValueType sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (lambdas[j] != storm::utility::zero()) { + sum += lambdas[j] * result.at( + getBeliefIdInVector(beliefGrid, observation, subSimplex[j])); + } + } + currentValue += iter->second * sum; + } + + // Update the selected actions + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { + chosenValue = currentValue; + chosenActionIndex = action; + } + } + chosenActions[initialBelief.id] = chosenActionIndex;*/ + + std::set exploredBelieves; + std::deque believesToBeExplored; + + exploredBelieves.insert(initialBelief.id); + believesToBeExplored.push_back(initialBelief.id); + while (!believesToBeExplored.empty()) { + auto currentBeliefId = believesToBeExplored.front(); + if (chosenActions.find(currentBeliefId) != chosenActions.end()) { + + } else { + + } + believesToBeExplored.pop_front(); + } + + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); } template - uint64_t ApproximatePOMDPModelchecker::getBeliefIdInGrid( - std::vector> &grid, uint32_t observation, std::vector probabilities) { + uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( + std::vector> &grid, uint32_t observation, + std::vector probabilities) { for (auto const &belief : grid) { - if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) { + if (belief.observation == observation) { if (belief.probabilities == probabilities) { STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); return belief.id; @@ -189,7 +270,7 @@ namespace storm { } template - Belief ApproximatePOMDPModelchecker::getInitialBelief( + storm::pomdp::Belief ApproximatePOMDPModelchecker::getInitialBelief( storm::models::sparse::Pomdp const &pomdp, uint64_t id) { STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); @@ -203,16 +284,18 @@ namespace storm { observation = pomdp.getObservation(state); } } - return Belief{id, observation, distribution}; + return storm::pomdp::Belief{id, observation, distribution}; } template void ApproximatePOMDPModelchecker::constructBeliefGrid( storm::models::sparse::Pomdp const &pomdp, std::set target_observations, uint64_t gridResolution, - std::vector> &grid, std::vector &beliefIsKnown) { + std::vector> &beliefList, + std::vector> &grid, std::vector &beliefIsKnown, + uint64_t nextId) { bool isTarget; - uint64_t newId = 0; + uint64_t newId = nextId; for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { std::vector statesWithObservation = pomdp.getStatesWithObservation(observation); @@ -224,10 +307,11 @@ namespace storm { std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); distribution[statesWithObservation.front()] = storm::utility::one(); - Belief belief = {newId, observation, distribution}; + storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); + beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); ++newId; @@ -251,10 +335,11 @@ namespace storm { helper[statesWithObservation.size() - 1] / storm::utility::convertNumber(gridResolution); - Belief belief = {newId, observation, distribution}; + storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); + beliefList.push_back(belief); grid.push_back(belief); beliefIsKnown.push_back(isTarget); if (helper[statesWithObservation.size() - 1] == @@ -353,7 +438,8 @@ namespace storm { template std::map ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief belief, uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) @@ -373,8 +459,10 @@ namespace storm { } template - Belief ApproximatePOMDPModelchecker::getBeliefAfterAction( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + storm::pomdp::Belief + ApproximatePOMDPModelchecker::getBeliefAfterAction( + storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; @@ -388,22 +476,13 @@ namespace storm { } } } - /* Should not be necessary - // We have to normalize the distribution - auto sum = storm::utility::zero(); - for(ValueType const& entry : distributionAfter){ - sum += entry; - } - for(size_t i = 0; i < pomdp.getNumberOfStates(); ++i){ - distributionAfter[i] /= sum; - }*/ - return Belief{id, observation, distributionAfter}; + return storm::pomdp::Belief{id, observation, distributionAfter}; } template - Belief - ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( + storm::models::sparse::Pomdp const &pomdp, + std::vector> &beliefList, storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -425,7 +504,12 @@ namespace storm { for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { distributionAfter[i] /= sum; } - return Belief{id, observation, distributionAfter}; + if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { + return getBeliefIdInVector(beliefList, observation, distributionAfter); + } else { + beliefList.push_back(storm::pomdp::Belief{id, observation, distributionAfter}); + return id; + } } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 4a4028288..e5636d5a9 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -2,21 +2,13 @@ #include "storm/modelchecker/CheckTask.h" #include "storm/models/sparse/Pomdp.h" #include "storm/utility/logging.h" +#include "storm-pomdp/storage/Belief.h" namespace storm { namespace pomdp { namespace modelchecker { class POMDPCheckResult; - // Structure used to represent a belief - template - struct Belief { - uint64_t id; - uint32_t observation; - //TODO make this sparse? - std::vector probabilities; - }; - template> class ApproximatePOMDPModelchecker { public: @@ -38,7 +30,7 @@ namespace storm { * @param id * @return */ - Belief + storm::pomdp::Belief getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); @@ -61,7 +53,9 @@ namespace storm { */ void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, std::set target_observations, uint64_t gridResolution, - std::vector> &grid, std::vector &beliefIsKnown); + std::vector> &beliefList, + std::vector> &grid, + std::vector &beliefIsKnown, uint64_t nextId); /** @@ -73,11 +67,13 @@ namespace storm { * @return */ std::map computeObservationProbabilitiesAfterAction( - storm::models::sparse::Pomdp const &pomdp, Belief belief, + storm::models::sparse::Pomdp const &pomdp, + storm::pomdp::Belief belief, uint64_t actionIndex); /** - * Helper method to get the next belief that results from a belief by performing an action and observing an observation + * Helper method to get the id of the next belief that results from a belief by performing an action and observing an observation. + * If the belief does not exist yet, it is created and added to the list of all beliefs * * @param pomdp the POMDP on which the evaluation should be performed * @param belief the starting belief @@ -85,9 +81,10 @@ namespace storm { * @param observation the observation after the action was performed * @return the resulting belief (observation and distribution) */ - Belief + uint64_t getBeliefAfterActionAndObservation(const models::sparse::Pomdp &pomdp, - Belief belief, + std::vector> &beliefList, + storm::pomdp::Belief belief, uint64_t actionIndex, uint32_t observation, uint64_t id); /** @@ -98,19 +95,19 @@ namespace storm { * @param actionIndex * @return */ - Belief + storm::pomdp::Belief getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, - Belief belief, uint64_t actionIndex, uint64_t id); + storm::pomdp::Belief belief, uint64_t actionIndex, uint64_t id); /** - * Helper to get the id of a Belief in the grid + * Helper to get the id of a Belief stored in a given vector structure * * @param observation * @param probabilities * @return */ - uint64_t getBeliefIdInGrid(std::vector> &grid, uint32_t observation, - std::vector probabilities); + uint64_t getBeliefIdInVector(std::vector> &grid, uint32_t observation, + std::vector probabilities); }; } diff --git a/src/storm-pomdp/storage/Belief.h b/src/storm-pomdp/storage/Belief.h new file mode 100644 index 000000000..bf4df4c9e --- /dev/null +++ b/src/storm-pomdp/storage/Belief.h @@ -0,0 +1,12 @@ +namespace storm { + namespace pomdp { + // Structure used to represent a belief + template + struct Belief { + uint64_t id; + uint32_t observation; + //TODO make this sparse? + std::vector probabilities; + }; + } +} \ No newline at end of file