From 7f9ad39d3499826bd610b5efe9a6c10e89d61c5a Mon Sep 17 00:00:00 2001 From: Alexander Bork Date: Wed, 11 Sep 2019 13:18:59 +0200 Subject: [PATCH] First version for the over-approximation of POMDP reachability --- src/storm-pomdp-cli/storm-pomdp.cpp | 18 +- .../ApproximatePOMDPModelchecker.cpp | 419 ++++++++++++++++++ .../ApproximatePOMDPModelchecker.h | 118 +++++ src/storm/models/sparse/Pomdp.cpp | 13 +- src/storm/models/sparse/Pomdp.h | 2 +- 5 files changed, 566 insertions(+), 4 deletions(-) create mode 100644 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp create mode 100644 src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index ec888bd02..730ce3ec8 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -38,6 +38,7 @@ #include "storm-pomdp/transformer/BinaryPomdpTransformer.h" #include "storm-pomdp/analysis/UniqueObservationStates.h" #include "storm-pomdp/analysis/QualitativeAnalysis.h" +#include "storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h" #include "storm/api/storm.h" /*! @@ -89,6 +90,18 @@ int main(const int argc, const char** argv) { auto const& coreSettings = storm::settings::getModule(); auto const& pomdpSettings = storm::settings::getModule(); + auto const &general = storm::settings::getModule(); + auto const &debug = storm::settings::getModule(); + + if (general.isVerboseSet()) { + storm::utility::setLogLevel(l3pp::LogLevel::INFO); + } + if (debug.isDebugSet()) { + storm::utility::setLogLevel(l3pp::LogLevel::DEBUG); + } + if (debug.isTraceSet()) { + storm::utility::setLogLevel(l3pp::LogLevel::TRACE); + } // For several engines, no model building step is performed, but the verification is started right away. storm::settings::modules::CoreSettings::Engine engine = coreSettings.getEngine(); @@ -98,7 +111,10 @@ int main(const int argc, const char** argv) { auto model = storm::cli::buildPreprocessExportModelWithValueTypeAndDdlib(symbolicInput, engine); STORM_LOG_THROW(model && model->getType() == storm::models::ModelType::Pomdp, storm::exceptions::WrongFormatException, "Expected a POMDP."); std::shared_ptr> pomdp = model->template as>(); - + + // For ease of testing + storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + std::shared_ptr formula; if (!symbolicInput.properties.empty()) { formula = symbolicInput.properties.front().getRawFormula(); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp new file mode 100644 index 000000000..52694180f --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -0,0 +1,419 @@ +#include +#include "ApproximatePOMDPModelchecker.h" +#include "storm/utility/vector.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + template + ApproximatePOMDPModelchecker::ApproximatePOMDPModelchecker() { + //Intentionally left empty + } + + template + /*std::unique_ptr*/ void + ApproximatePOMDPModelchecker::computeReachabilityProbability( + storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, bool min, uint64_t gridResolution) { + //TODO add timing + uint64_t maxIterations = 100; + bool finished = false; + uint64_t iteration = 0; + + + std::vector> beliefGrid; + std::vector beliefIsKnown; + constructBeliefGrid(pomdp, target_observations, gridResolution, beliefGrid, beliefIsKnown); + std::map result; + std::map result_backup; + + std::vector>> observationProbabilities; + std::vector>>> nextBelieves; + + uint64_t nextId = beliefGrid.size(); + for (size_t i = 0; i < beliefGrid.size(); ++i) { + auto currentBelief = beliefGrid[i]; + bool isTarget = beliefIsKnown[i]; + if (isTarget) { + result.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::one())); + } else { + result.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + result_backup.emplace(std::make_pair(currentBelief.id, storm::utility::zero())); + + std::vector> observationProbabilitiesInAction; + std::vector>> nextBelievesInAction; + + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + for (uint64_t action = 0; action < numChoices; ++action) { + std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction( + pomdp, currentBelief, action); + std::map> actionObservationBelieves; + for (auto iter = actionObservationProbabilities.begin(); + iter != actionObservationProbabilities.end(); ++iter) { + uint32_t observation = iter->first; + actionObservationBelieves[observation] = getBeliefAfterActionAndObservation(pomdp, + currentBelief, + action, + observation, + nextId); + ++nextId; + } + observationProbabilitiesInAction.push_back(actionObservationProbabilities); + nextBelievesInAction.push_back(actionObservationBelieves); + } + observationProbabilities.push_back(observationProbabilitiesInAction); + nextBelieves.push_back(nextBelievesInAction); + } + } + + // Value Iteration + while (!finished && iteration < maxIterations) { + STORM_LOG_DEBUG("Iteration " << std::to_string(iteration)); + bool improvement = false; + for (size_t i = 0; i < beliefGrid.size(); ++i) { + bool isTarget = beliefIsKnown[i]; + if (!isTarget) { + Belief currentBelief = beliefGrid[i]; + // we can take any state with the observation as they have the same number of choices + uint64_t numChoices = pomdp.getNumberOfChoices( + pomdp.getStatesWithObservation(currentBelief.observation).front()); + + // Initialize the values for the value iteration + ValueType bestValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); + uint64_t chosenActionIndex = std::numeric_limits::infinity(); + ValueType currentValue; + + for (uint64_t action = 0; action < numChoices; ++action) { + currentValue = storm::utility::zero(); // simply change this for rewards? + + for (auto iter = observationProbabilities[i][action].begin(); + iter != observationProbabilities[i][action].end(); ++iter) { + uint32_t observation = iter->first; + Belief nextBelief = nextBelieves[i][action][observation]; + + // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + currentBelief.probabilities, gridResolution); + std::vector> subSimplex = temp.first; + std::vector lambdas = temp.second; + + ValueType sum = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (lambdas[j] != storm::utility::zero()) { + sum += lambdas[j] * result_backup.at( + getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); + } + } + + currentValue += iter->second * sum; + } + + // Update the selected actions + auto cc = storm::utility::ConstantsComparator(); + if ((min && cc.isLess(storm::utility::zero(), bestValue - currentValue)) || + (!min && cc.isLess(storm::utility::zero(), currentValue - bestValue))) { + improvement = true; + bestValue = currentValue; + chosenActionIndex = action; + } + // TODO tie breaker? + } + result[currentBelief.id] = bestValue; + } + } + finished = !improvement; + // back up + for (auto iter = result.begin(); iter != result.end(); ++iter) { + result_backup[iter->first] = result[iter->first]; + } + ++iteration; + } + + // maybe change this so the initial Belief always has ID 0 + Belief initialBelief = getInitialBelief(pomdp, nextId); + ++nextId; + + std::pair>, std::vector> temp = computeSubSimplexAndLambdas( + initialBelief.probabilities, gridResolution); + std::vector> subSimplex = temp.first; + std::vector lambdas = temp.second; + + ValueType overApprox = storm::utility::zero(); + for (size_t j = 0; j < lambdas.size(); ++j) { + if (lambdas[j] != storm::utility::zero()) { + overApprox += lambdas[j] * + result_backup[getBeliefIdInGrid(beliefGrid, initialBelief.observation, + subSimplex[j])]; + } + } + + STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox); + } + + template + uint64_t ApproximatePOMDPModelchecker::getBeliefIdInGrid( + std::vector> &grid, uint32_t observation, std::vector probabilities) { + for (auto const &belief : grid) { + if (belief.observation == observation && probabilities.size() == belief.probabilities.size()) { + if (belief.probabilities == probabilities) { + STORM_LOG_DEBUG("Found belief with id " << std::to_string(belief.id)); + return belief.id; + } + } + } + STORM_LOG_DEBUG("Did not find the belief in the grid"); + return -1; + } + + template + Belief ApproximatePOMDPModelchecker::getInitialBelief( + storm::models::sparse::Pomdp const &pomdp, uint64_t id) { + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1, + "POMDP contains more than one initial state"); + std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); + uint32_t observation = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (pomdp.getInitialStates()[state] == 1) { + distribution[state] = storm::utility::one(); + observation = pomdp.getObservation(state); + } + } + return Belief{id, observation, distribution}; + } + + template + void ApproximatePOMDPModelchecker::constructBeliefGrid( + storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, uint64_t gridResolution, + std::vector> &grid, std::vector &beliefIsKnown) { + bool isTarget; + uint64_t newId = 0; + + for (uint32_t observation = 0; observation < pomdp.getNrObservations(); ++observation) { + std::vector statesWithObservation = pomdp.getStatesWithObservation(observation); + isTarget = target_observations.find(observation) != target_observations.end(); + + // TODO this can probably be condensed + if (statesWithObservation.size() == 1) { + // If there is only one state with the observation, we can directly add the corresponding belief + std::vector distribution(pomdp.getNumberOfStates(), + storm::utility::zero()); + distribution[statesWithObservation.front()] = storm::utility::one(); + Belief belief = {newId, observation, distribution}; + STORM_LOG_TRACE( + "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," + << distribution << "]"); + grid.push_back(belief); + beliefIsKnown.push_back(isTarget); + ++newId; + } else { + // Otherwise we have to enumerate all possible distributions with regards to the grid + // helper is used to derive the distribution of the belief + std::vector helper(statesWithObservation.size(), 0); + helper[0] = gridResolution; + bool done = false; + uint64_t index = 0; + + while (!done) { + std::vector distribution(pomdp.getNumberOfStates(), + storm::utility::zero()); + for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { + distribution[statesWithObservation[i]] = ValueType( + double(helper[i] - helper[i + 1]) / gridResolution); + } + distribution[statesWithObservation.back()] = ValueType( + double(helper[statesWithObservation.size() - 1]) / gridResolution); + + Belief belief = {newId, observation, distribution}; + STORM_LOG_TRACE( + "Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) + << ")," << distribution << "]"); + grid.push_back(belief); + beliefIsKnown.push_back(isTarget); + if (helper[statesWithObservation.size() - 1] == gridResolution) { + // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions + done = true; + } else { + // Update helper by finding the index to increment + index = statesWithObservation.size() - 1; + while (helper[index] == helper[index - 1]) { + --index; + } + STORM_LOG_ASSERT(index > 0, "Error in BeliefGrid generation - index wrong"); + // Increment the value at the index + ++helper[index]; + // Reset all indices greater than the changed one to 0 + ++index; + while (index < statesWithObservation.size()) { + helper[index] = 0; + ++index; + } + } + ++newId; + } + } + } + } + + template + std::pair>, std::vector> + ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( + std::vector probabilities, uint64_t resolution) { + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) + // Variable names are based on the paper + + std::vector x(probabilities.size(), storm::utility::zero()); + std::vector v(probabilities.size(), storm::utility::zero()); + std::vector d(probabilities.size(), storm::utility::zero()); + + for (size_t i = 0; i < probabilities.size(); ++i) { + for (size_t j = i; j < probabilities.size(); ++j) { + x[i] += storm::utility::convertNumber(resolution) * probabilities[j]; + } + v[i] = storm::utility::floor(x[i]); + d[i] = x[i] - v[i]; + } + + auto p = storm::utility::vector::getSortedIndices(d); + + std::vector> qs; + for (size_t i = 0; i < probabilities.size(); ++i) { + std::vector q; + if (i == 0) { + for (size_t j = 0; j < probabilities.size(); ++j) { + q[i] = v[i]; + } + qs.push_back(q); + } else { + for (size_t j = 0; j < probabilities.size(); ++j) { + if (j == p[i - 1]) { + q[j] = qs[i - 1][j] + storm::utility::one(); + } else { + q[j] = qs[i - 1][j]; + } + } + qs.push_back(q); + } + } + + std::vector> subSimplex; + for (auto q : qs) { + std::vector node; + for (size_t i = 0; i < probabilities.size(); ++i) { + if (i != probabilities.size() - 1) { + node.push_back((q[i] - q[i + 1]) / storm::utility::convertNumber(resolution)); + } else { + node.push_back(q[i] / storm::utility::convertNumber(resolution)); + } + } + subSimplex.push_back(node); + } + + std::vector lambdas(probabilities.size(), storm::utility::zero()); + auto sum = storm::utility::zero(); + for (size_t i = 1; i < probabilities.size(); ++i) { + lambdas[i] = d[p[i - 1]] - d[p[i]]; + sum += d[p[i - 1]] - d[p[i]]; + } + lambdas[0] = storm::utility::one() - sum; + + //TODO add assertion that we are close enough + return std::make_pair(subSimplex, lambdas); + } + + + template + std::map + ApproximatePOMDPModelchecker::computeObservationProbabilitiesAfterAction( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex) { + std::map res; + // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) + std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, + 0).probabilities; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + uint32_t observation = pomdp.getObservation(state); + if (postProbabilities[state] != storm::utility::zero()) { + if (res.count(observation) == 0) { + res[observation] = postProbabilities[state]; + } else { + res[observation] += postProbabilities[state]; + } + } + } + return res; + } + + template + Belief ApproximatePOMDPModelchecker::getBeliefAfterAction( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex, uint64_t id) { + std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + uint32_t observation = 0; + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (belief.probabilities[state] != storm::utility::zero()) { + auto row = pomdp.getTransitionMatrix().getRow( + pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + observation = pomdp.getObservation(entry.getColumn()); + distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); + } + } + } + /* Should not be necessary + // We have to normalize the distribution + auto sum = storm::utility::zero(); + for(ValueType const& entry : distributionAfter){ + sum += entry; + } + for(size_t i = 0; i < pomdp.getNumberOfStates(); ++i){ + distributionAfter[i] /= sum; + }*/ + return Belief{id, observation, distributionAfter}; + } + + template + Belief + ApproximatePOMDPModelchecker::getBeliefAfterActionAndObservation( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex, uint32_t observation, uint64_t id) { + std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { + if (belief.probabilities[state] != storm::utility::zero()) { + auto row = pomdp.getTransitionMatrix().getRow( + pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (pomdp.getObservation(entry.getColumn()) == observation) { + distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); + } + } + } + } + // We have to normalize the distribution + auto sum = storm::utility::zero(); + for (ValueType const &entry : distributionAfter) { + sum += entry; + } + for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { + distributionAfter[i] /= sum; + } + return Belief{id, observation, distributionAfter}; + } + + + template + class ApproximatePOMDPModelchecker; + +#ifdef STORM_HAVE_CARL + + //template class ApproximatePOMDPModelchecker; + template + class ApproximatePOMDPModelchecker; + +#endif + } + } +} \ No newline at end of file diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h new file mode 100644 index 000000000..4a4028288 --- /dev/null +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -0,0 +1,118 @@ +#include +#include "storm/modelchecker/CheckTask.h" +#include "storm/models/sparse/Pomdp.h" +#include "storm/utility/logging.h" + +namespace storm { + namespace pomdp { + namespace modelchecker { + class POMDPCheckResult; + + // Structure used to represent a belief + template + struct Belief { + uint64_t id; + uint32_t observation; + //TODO make this sparse? + std::vector probabilities; + }; + + template> + class ApproximatePOMDPModelchecker { + public: + explicit ApproximatePOMDPModelchecker(); + + /*std::unique_ptr*/ void + computeReachabilityProbability(storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, bool min, + uint64_t gridResolution); + + std::unique_ptr + computeReachabilityReward(storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, uint64_t gridResolution); + + private: + /** + * + * @param pomdp + * @param id + * @return + */ + Belief + getInitialBelief(storm::models::sparse::Pomdp const &pomdp, uint64_t id); + + + /** + * + * @param probabilities + * @param gridResolution + * @return + */ + std::pair>, std::vector> + computeSubSimplexAndLambdas(std::vector probabilities, uint64_t gridResolution); + + + /** + * Helper method to construct the grid of Belief states to approximate the POMDP + * + * @param pomdp + * @param gridResolution + * + */ + void constructBeliefGrid(storm::models::sparse::Pomdp const &pomdp, + std::set target_observations, uint64_t gridResolution, + std::vector> &grid, std::vector &beliefIsKnown); + + + /** + * Helper method to get the probabilities of each observation after performing an action + * + * @param pomdp + * @param belief + * @param actionIndex + * @return + */ + std::map computeObservationProbabilitiesAfterAction( + storm::models::sparse::Pomdp const &pomdp, Belief belief, + uint64_t actionIndex); + + /** + * Helper method to get the next belief that results from a belief by performing an action and observing an observation + * + * @param pomdp the POMDP on which the evaluation should be performed + * @param belief the starting belief + * @param actionIndex the index of the action to be performed + * @param observation the observation after the action was performed + * @return the resulting belief (observation and distribution) + */ + Belief + getBeliefAfterActionAndObservation(const models::sparse::Pomdp &pomdp, + Belief belief, + uint64_t actionIndex, uint32_t observation, uint64_t id); + + /** + * Helper method to get the next belief that results from a belief by performing an action + * + * @param pomdp + * @param belief + * @param actionIndex + * @return + */ + Belief + getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, + Belief belief, uint64_t actionIndex, uint64_t id); + + /** + * Helper to get the id of a Belief in the grid + * + * @param observation + * @param probabilities + * @return + */ + uint64_t getBeliefIdInGrid(std::vector> &grid, uint32_t observation, + std::vector probabilities); + }; + + } + } +} \ No newline at end of file diff --git a/src/storm/models/sparse/Pomdp.cpp b/src/storm/models/sparse/Pomdp.cpp index b3930822c..babc81e6b 100644 --- a/src/storm/models/sparse/Pomdp.cpp +++ b/src/storm/models/sparse/Pomdp.cpp @@ -59,8 +59,17 @@ namespace storm { return observations; } - - + template + std::vector + Pomdp::getStatesWithObservation(uint32_t observation) const { + std::vector result; + for (uint64_t state = 0; state < this->getNumberOfStates(); ++state) { + if (this->getObservation(state) == observation) { + result.push_back(state); + } + } + return result; + } template class Pomdp; template class Pomdp; diff --git a/src/storm/models/sparse/Pomdp.h b/src/storm/models/sparse/Pomdp.h index de77ef6f0..d480deebc 100644 --- a/src/storm/models/sparse/Pomdp.h +++ b/src/storm/models/sparse/Pomdp.h @@ -61,7 +61,7 @@ namespace storm { std::vector const& getObservations() const; - + std::vector getStatesWithObservation(uint32_t observation) const; protected: // TODO: consider a bitvector based presentation (depending on our needs).